notionary 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +44 -1
- notionary/blocks/client.py +37 -11
- notionary/blocks/rich_text/markdown_rich_text_converter.py +49 -15
- notionary/blocks/rich_text/models.py +13 -4
- notionary/blocks/rich_text/name_id_resolver/data_source.py +9 -3
- notionary/blocks/rich_text/name_id_resolver/person.py +6 -2
- notionary/blocks/rich_text/rich_text_markdown_converter.py +10 -3
- notionary/blocks/schemas.py +2 -1
- notionary/comments/client.py +19 -6
- notionary/comments/factory.py +10 -3
- notionary/comments/schemas.py +9 -3
- notionary/comments/service.py +12 -4
- notionary/data_source/http/data_source_instance_client.py +59 -17
- notionary/data_source/properties/schemas.py +30 -10
- notionary/data_source/query/builder.py +67 -18
- notionary/data_source/query/resolver.py +16 -5
- notionary/data_source/query/schema.py +24 -6
- notionary/data_source/query/validator.py +18 -6
- notionary/data_source/schema/registry.py +31 -12
- notionary/data_source/schema/service.py +66 -20
- notionary/data_source/service.py +74 -23
- notionary/database/client.py +27 -9
- notionary/database/database_metadata_update_client.py +12 -4
- notionary/database/service.py +11 -4
- notionary/exceptions/__init__.py +15 -3
- notionary/exceptions/block_parsing.py +6 -2
- notionary/exceptions/data_source/builder.py +11 -5
- notionary/exceptions/data_source/properties.py +3 -1
- notionary/exceptions/file_upload.py +12 -3
- notionary/exceptions/properties.py +3 -1
- notionary/exceptions/search.py +6 -2
- notionary/file_upload/client.py +5 -1
- notionary/file_upload/config/config.py +10 -3
- notionary/file_upload/query/builder.py +6 -2
- notionary/file_upload/schemas.py +3 -1
- notionary/file_upload/service.py +42 -14
- notionary/file_upload/validation/factory.py +3 -1
- notionary/file_upload/validation/impl/file_name_length.py +3 -1
- notionary/file_upload/validation/models.py +15 -5
- notionary/file_upload/validation/validators/file_extension.py +12 -3
- notionary/http/client.py +27 -8
- notionary/page/content/__init__.py +9 -0
- notionary/page/content/factory.py +21 -7
- notionary/page/content/markdown/builder.py +85 -23
- notionary/page/content/markdown/nodes/audio.py +8 -4
- notionary/page/content/markdown/nodes/base.py +3 -3
- notionary/page/content/markdown/nodes/bookmark.py +5 -3
- notionary/page/content/markdown/nodes/breadcrumb.py +2 -2
- notionary/page/content/markdown/nodes/bulleted_list.py +5 -3
- notionary/page/content/markdown/nodes/callout.py +2 -2
- notionary/page/content/markdown/nodes/code.py +5 -3
- notionary/page/content/markdown/nodes/columns.py +3 -3
- notionary/page/content/markdown/nodes/container.py +9 -5
- notionary/page/content/markdown/nodes/divider.py +2 -2
- notionary/page/content/markdown/nodes/embed.py +8 -4
- notionary/page/content/markdown/nodes/equation.py +4 -2
- notionary/page/content/markdown/nodes/file.py +8 -4
- notionary/page/content/markdown/nodes/heading.py +2 -2
- notionary/page/content/markdown/nodes/image.py +8 -4
- notionary/page/content/markdown/nodes/mixins/caption.py +5 -3
- notionary/page/content/markdown/nodes/numbered_list.py +5 -3
- notionary/page/content/markdown/nodes/paragraph.py +4 -2
- notionary/page/content/markdown/nodes/pdf.py +8 -4
- notionary/page/content/markdown/nodes/quote.py +2 -2
- notionary/page/content/markdown/nodes/space.py +2 -2
- notionary/page/content/markdown/nodes/table.py +8 -5
- notionary/page/content/markdown/nodes/table_of_contents.py +2 -2
- notionary/page/content/markdown/nodes/todo.py +15 -7
- notionary/page/content/markdown/nodes/toggle.py +2 -2
- notionary/page/content/markdown/nodes/video.py +8 -4
- notionary/page/content/markdown/structured_output/__init__.py +73 -0
- notionary/page/content/markdown/structured_output/models.py +391 -0
- notionary/page/content/markdown/structured_output/service.py +211 -0
- notionary/page/content/parser/context.py +1 -1
- notionary/page/content/parser/factory.py +23 -8
- notionary/page/content/parser/parsers/audio.py +7 -2
- notionary/page/content/parser/parsers/base.py +2 -2
- notionary/page/content/parser/parsers/bookmark.py +2 -2
- notionary/page/content/parser/parsers/breadcrumb.py +2 -2
- notionary/page/content/parser/parsers/bulleted_list.py +19 -6
- notionary/page/content/parser/parsers/callout.py +15 -5
- notionary/page/content/parser/parsers/caption.py +9 -3
- notionary/page/content/parser/parsers/code.py +21 -7
- notionary/page/content/parser/parsers/column.py +8 -4
- notionary/page/content/parser/parsers/column_list.py +19 -7
- notionary/page/content/parser/parsers/divider.py +2 -2
- notionary/page/content/parser/parsers/embed.py +2 -2
- notionary/page/content/parser/parsers/equation.py +8 -4
- notionary/page/content/parser/parsers/file.py +7 -2
- notionary/page/content/parser/parsers/file_like_block.py +30 -10
- notionary/page/content/parser/parsers/heading.py +31 -10
- notionary/page/content/parser/parsers/image.py +7 -2
- notionary/page/content/parser/parsers/numbered_list.py +18 -6
- notionary/page/content/parser/parsers/paragraph.py +3 -1
- notionary/page/content/parser/parsers/pdf.py +7 -2
- notionary/page/content/parser/parsers/quote.py +28 -9
- notionary/page/content/parser/parsers/space.py +2 -2
- notionary/page/content/parser/parsers/table.py +31 -10
- notionary/page/content/parser/parsers/table_of_contents.py +7 -3
- notionary/page/content/parser/parsers/todo.py +15 -5
- notionary/page/content/parser/parsers/toggle.py +15 -5
- notionary/page/content/parser/parsers/video.py +7 -2
- notionary/page/content/parser/post_processing/handlers/rich_text_length.py +8 -2
- notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +8 -2
- notionary/page/content/parser/post_processing/service.py +3 -1
- notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +21 -7
- notionary/page/content/parser/pre_processsing/handlers/indentation.py +11 -4
- notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +13 -6
- notionary/page/content/parser/service.py +4 -1
- notionary/page/content/renderer/context.py +15 -5
- notionary/page/content/renderer/factory.py +12 -6
- notionary/page/content/renderer/post_processing/handlers/numbered_list.py +19 -9
- notionary/page/content/renderer/renderers/audio.py +14 -5
- notionary/page/content/renderer/renderers/base.py +3 -3
- notionary/page/content/renderer/renderers/bookmark.py +3 -1
- notionary/page/content/renderer/renderers/bulleted_list.py +11 -5
- notionary/page/content/renderer/renderers/callout.py +19 -7
- notionary/page/content/renderer/renderers/captioned_block.py +11 -5
- notionary/page/content/renderer/renderers/code.py +6 -2
- notionary/page/content/renderer/renderers/column.py +3 -1
- notionary/page/content/renderer/renderers/column_list.py +3 -1
- notionary/page/content/renderer/renderers/embed.py +3 -1
- notionary/page/content/renderer/renderers/equation.py +3 -1
- notionary/page/content/renderer/renderers/file.py +14 -5
- notionary/page/content/renderer/renderers/file_like_block.py +8 -4
- notionary/page/content/renderer/renderers/heading.py +22 -8
- notionary/page/content/renderer/renderers/image.py +13 -4
- notionary/page/content/renderer/renderers/numbered_list.py +8 -3
- notionary/page/content/renderer/renderers/paragraph.py +12 -4
- notionary/page/content/renderer/renderers/pdf.py +14 -5
- notionary/page/content/renderer/renderers/quote.py +14 -6
- notionary/page/content/renderer/renderers/table.py +15 -5
- notionary/page/content/renderer/renderers/todo.py +16 -6
- notionary/page/content/renderer/renderers/toggle.py +8 -4
- notionary/page/content/renderer/renderers/video.py +14 -5
- notionary/page/content/renderer/service.py +9 -3
- notionary/page/content/service.py +21 -7
- notionary/page/content/syntax/definition/__init__.py +11 -0
- notionary/page/content/syntax/definition/models.py +57 -0
- notionary/page/content/syntax/definition/registry.py +371 -0
- notionary/page/content/syntax/prompts/__init__.py +4 -0
- notionary/page/content/syntax/prompts/models.py +11 -0
- notionary/page/content/syntax/prompts/registry.py +703 -0
- notionary/page/page_metadata_update_client.py +12 -4
- notionary/page/properties/client.py +45 -15
- notionary/page/properties/factory.py +6 -2
- notionary/page/properties/service.py +110 -36
- notionary/page/service.py +20 -6
- notionary/shared/entity/client.py +6 -2
- notionary/shared/entity/dto_parsers.py +3 -1
- notionary/shared/entity/entity_metadata_update_client.py +9 -3
- notionary/shared/entity/service.py +53 -22
- notionary/shared/models/file.py +3 -1
- notionary/user/base.py +6 -2
- notionary/user/bot.py +10 -2
- notionary/user/client.py +3 -1
- notionary/user/person.py +3 -1
- notionary/user/schemas.py +3 -1
- notionary/user/service.py +6 -2
- notionary/utils/decorators.py +6 -2
- notionary/utils/fuzzy.py +6 -2
- notionary/utils/mixins/logging.py +3 -1
- notionary/utils/pagination.py +14 -4
- notionary/workspace/__init__.py +5 -1
- notionary/workspace/query/service.py +59 -16
- notionary/workspace/service.py +39 -11
- {notionary-0.4.0.dist-info → notionary-0.4.1.dist-info}/METADATA +1 -1
- notionary-0.4.1.dist-info/RECORD +236 -0
- notionary/page/blocks/client.py +0 -1
- notionary/page/content/syntax/__init__.py +0 -5
- notionary/page/content/syntax/models.py +0 -66
- notionary/page/content/syntax/registry.py +0 -371
- notionary-0.4.0.dist-info/RECORD +0 -230
- /notionary/page/content/syntax/{grammar.py → definition/grammar.py} +0 -0
- {notionary-0.4.0.dist-info → notionary-0.4.1.dist-info}/WHEEL +0 -0
- {notionary-0.4.0.dist-info → notionary-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -26,7 +26,9 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
26
26
|
flattened_blocks = self._flatten_blocks(blocks)
|
|
27
27
|
return [self._process_block(block) for block in flattened_blocks]
|
|
28
28
|
|
|
29
|
-
def _flatten_blocks(
|
|
29
|
+
def _flatten_blocks(
|
|
30
|
+
self, blocks: list[_NestedBlockList]
|
|
31
|
+
) -> list[BlockCreatePayload]:
|
|
30
32
|
flattened: list[BlockCreatePayload] = []
|
|
31
33
|
|
|
32
34
|
for item in blocks:
|
|
@@ -92,4 +94,8 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
92
94
|
return content[:cutoff] + "..."
|
|
93
95
|
|
|
94
96
|
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
95
|
-
return
|
|
97
|
+
return (
|
|
98
|
+
rich_text.type == RichTextType.TEXT
|
|
99
|
+
and rich_text.text
|
|
100
|
+
and rich_text.text.content
|
|
101
|
+
)
|
|
@@ -27,7 +27,9 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
27
27
|
flattened_blocks = self._flatten_blocks(blocks)
|
|
28
28
|
return [self._process_block(block) for block in flattened_blocks]
|
|
29
29
|
|
|
30
|
-
def _flatten_blocks(
|
|
30
|
+
def _flatten_blocks(
|
|
31
|
+
self, blocks: list[_NestedBlockList]
|
|
32
|
+
) -> list[BlockCreatePayload]:
|
|
31
33
|
flattened: list[BlockCreatePayload] = []
|
|
32
34
|
|
|
33
35
|
for item in blocks:
|
|
@@ -111,4 +113,8 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
111
113
|
return content[:cutoff] + self.ELLIPSIS
|
|
112
114
|
|
|
113
115
|
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
114
|
-
return
|
|
116
|
+
return (
|
|
117
|
+
rich_text.type == RichTextType.TEXT
|
|
118
|
+
and rich_text.text is not None
|
|
119
|
+
and rich_text.text.content
|
|
120
|
+
)
|
|
@@ -9,7 +9,9 @@ class BlockPostProcessor:
|
|
|
9
9
|
def register(self, processor: PostProcessor) -> None:
|
|
10
10
|
self._processors.append(processor)
|
|
11
11
|
|
|
12
|
-
def process(
|
|
12
|
+
def process(
|
|
13
|
+
self, created_blocks: list[BlockCreatePayload]
|
|
14
|
+
) -> list[BlockCreatePayload]:
|
|
13
15
|
result = created_blocks
|
|
14
16
|
for processor in self._processors:
|
|
15
17
|
result = processor.process(created_blocks)
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
|
-
from notionary.exceptions.block_parsing import
|
|
4
|
+
from notionary.exceptions.block_parsing import (
|
|
5
|
+
InsufficientColumnsError,
|
|
6
|
+
InvalidColumnRatioSumError,
|
|
7
|
+
)
|
|
5
8
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
6
|
-
from notionary.page.content.syntax import
|
|
9
|
+
from notionary.page.content.syntax.definition import (
|
|
10
|
+
MarkdownGrammar,
|
|
11
|
+
SyntaxDefinitionRegistry,
|
|
12
|
+
)
|
|
7
13
|
from notionary.utils.decorators import time_execution_sync
|
|
8
14
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
9
15
|
|
|
@@ -13,15 +19,21 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
13
19
|
_MINIMUM_COLUMNS = 2
|
|
14
20
|
|
|
15
21
|
def __init__(
|
|
16
|
-
self,
|
|
22
|
+
self,
|
|
23
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
24
|
+
markdown_grammar: MarkdownGrammar | None = None,
|
|
17
25
|
) -> None:
|
|
18
26
|
super().__init__()
|
|
19
|
-
self._syntax_registry = syntax_registry or
|
|
27
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
20
28
|
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
21
29
|
|
|
22
30
|
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
23
|
-
self._column_list_delimiter =
|
|
24
|
-
|
|
31
|
+
self._column_list_delimiter = (
|
|
32
|
+
self._syntax_registry.get_column_list_syntax().start_delimiter
|
|
33
|
+
)
|
|
34
|
+
self._column_delimiter = (
|
|
35
|
+
self._syntax_registry.get_column_syntax().start_delimiter
|
|
36
|
+
)
|
|
25
37
|
self._column_pattern = self._syntax_registry.get_column_syntax().regex_pattern
|
|
26
38
|
|
|
27
39
|
@override
|
|
@@ -124,7 +136,9 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
124
136
|
total_ratio = sum(ratios)
|
|
125
137
|
|
|
126
138
|
if not self._is_ratio_sum_valid(total_ratio):
|
|
127
|
-
self.logger.error(
|
|
139
|
+
self.logger.error(
|
|
140
|
+
f"Column ratios must sum to 1.0 (±{self._RATIO_TOLERANCE}), but sum to {total_ratio:.4f}"
|
|
141
|
+
)
|
|
128
142
|
raise InvalidColumnRatioSumError(total_ratio, self._RATIO_TOLERANCE)
|
|
129
143
|
|
|
130
144
|
def _should_validate_ratios(self, ratios: list[float], column_count: int) -> bool:
|
|
@@ -2,21 +2,28 @@ import math
|
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
4
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
5
|
-
from notionary.page.content.syntax import
|
|
5
|
+
from notionary.page.content.syntax.definition import (
|
|
6
|
+
MarkdownGrammar,
|
|
7
|
+
SyntaxDefinitionRegistry,
|
|
8
|
+
)
|
|
6
9
|
from notionary.utils.decorators import time_execution_sync
|
|
7
10
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class IndentationNormalizer(PreProcessor, LoggingMixin):
|
|
11
14
|
def __init__(
|
|
12
|
-
self,
|
|
15
|
+
self,
|
|
16
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
17
|
+
markdown_grammar: MarkdownGrammar | None = None,
|
|
13
18
|
) -> None:
|
|
14
19
|
super().__init__()
|
|
15
|
-
self._syntax_registry = syntax_registry or
|
|
20
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
16
21
|
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
17
22
|
|
|
18
23
|
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
19
|
-
self._code_block_start_delimiter =
|
|
24
|
+
self._code_block_start_delimiter = (
|
|
25
|
+
self._syntax_registry.get_code_syntax().start_delimiter
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
@override
|
|
22
29
|
@time_execution_sync()
|
|
@@ -5,18 +5,22 @@ from urllib.parse import urlparse
|
|
|
5
5
|
from notionary.blocks.enums import VideoFileType
|
|
6
6
|
from notionary.exceptions import UnsupportedVideoFormatError
|
|
7
7
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
8
|
-
from notionary.page.content.syntax import
|
|
8
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
9
|
from notionary.utils.decorators import time_execution_sync
|
|
10
10
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class VideoFormatPreProcessor(PreProcessor, LoggingMixin):
|
|
14
|
-
YOUTUBE_WATCH_PATTERN = re.compile(
|
|
15
|
-
|
|
14
|
+
YOUTUBE_WATCH_PATTERN = re.compile(
|
|
15
|
+
r"^https?://(?:www\.)?youtube\.com/watch\?.*v=[\w-]+", re.IGNORECASE
|
|
16
|
+
)
|
|
17
|
+
YOUTUBE_EMBED_PATTERN = re.compile(
|
|
18
|
+
r"^https?://(?:www\.)?youtube\.com/embed/[\w-]+", re.IGNORECASE
|
|
19
|
+
)
|
|
16
20
|
|
|
17
|
-
def __init__(self, syntax_registry:
|
|
21
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry | None = None) -> None:
|
|
18
22
|
super().__init__()
|
|
19
|
-
self._syntax_registry = syntax_registry or
|
|
23
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
20
24
|
self._video_syntax = self._syntax_registry.get_video_syntax()
|
|
21
25
|
|
|
22
26
|
@override
|
|
@@ -53,7 +57,10 @@ class VideoFormatPreProcessor(PreProcessor, LoggingMixin):
|
|
|
53
57
|
)
|
|
54
58
|
|
|
55
59
|
def _is_youtube_video(self, url: str) -> bool:
|
|
56
|
-
return bool(
|
|
60
|
+
return bool(
|
|
61
|
+
self.YOUTUBE_WATCH_PATTERN.match(url)
|
|
62
|
+
or self.YOUTUBE_EMBED_PATTERN.match(url)
|
|
63
|
+
)
|
|
57
64
|
|
|
58
65
|
def _has_valid_video_extension(self, url: str) -> bool:
|
|
59
66
|
return VideoFileType.is_valid_extension(url)
|
|
@@ -11,7 +11,10 @@ from notionary.utils.mixins.logging import LoggingMixin
|
|
|
11
11
|
|
|
12
12
|
class MarkdownToNotionConverter(LoggingMixin):
|
|
13
13
|
def __init__(
|
|
14
|
-
self,
|
|
14
|
+
self,
|
|
15
|
+
line_parser: LineParser,
|
|
16
|
+
pre_processor: MarkdownPreProcessor,
|
|
17
|
+
post_processor: BlockPostProcessor,
|
|
15
18
|
) -> None:
|
|
16
19
|
self._line_parser = line_parser
|
|
17
20
|
self._pre_processor = pre_processor
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from collections.abc import Awaitable, Callable
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.schemas import Block
|
|
4
|
-
from notionary.page.content.syntax.grammar import MarkdownGrammar
|
|
4
|
+
from notionary.page.content.syntax.definition.grammar import MarkdownGrammar
|
|
5
5
|
|
|
6
6
|
ConvertChildrenCallback = Callable[[list[Block], int], Awaitable[str]]
|
|
7
7
|
|
|
@@ -25,14 +25,20 @@ class MarkdownRenderingContext:
|
|
|
25
25
|
async def render_children(self) -> str:
|
|
26
26
|
return await self._convert_children_to_markdown(self.indent_level)
|
|
27
27
|
|
|
28
|
-
async def render_children_with_additional_indent(
|
|
29
|
-
|
|
28
|
+
async def render_children_with_additional_indent(
|
|
29
|
+
self, additional_indent: int
|
|
30
|
+
) -> str:
|
|
31
|
+
return await self._convert_children_to_markdown(
|
|
32
|
+
self.indent_level + additional_indent
|
|
33
|
+
)
|
|
30
34
|
|
|
31
35
|
async def _convert_children_to_markdown(self, indent_level: int) -> str:
|
|
32
36
|
if not self._has_children() or not self.convert_children_callback:
|
|
33
37
|
return ""
|
|
34
38
|
|
|
35
|
-
return await self.convert_children_callback(
|
|
39
|
+
return await self.convert_children_callback(
|
|
40
|
+
self._get_children_blocks(), indent_level
|
|
41
|
+
)
|
|
36
42
|
|
|
37
43
|
def _get_children_blocks(self) -> list[Block]:
|
|
38
44
|
if self._has_children():
|
|
@@ -40,7 +46,11 @@ class MarkdownRenderingContext:
|
|
|
40
46
|
return []
|
|
41
47
|
|
|
42
48
|
def _has_children(self) -> bool:
|
|
43
|
-
return
|
|
49
|
+
return (
|
|
50
|
+
self.block.has_children
|
|
51
|
+
and self.block.children
|
|
52
|
+
and len(self.block.children) > 0
|
|
53
|
+
)
|
|
44
54
|
|
|
45
55
|
def indent_text(self, text: str) -> str:
|
|
46
56
|
if not text:
|
|
@@ -29,17 +29,19 @@ from notionary.page.content.renderer.renderers import (
|
|
|
29
29
|
ToggleRenderer,
|
|
30
30
|
VideoRenderer,
|
|
31
31
|
)
|
|
32
|
-
from notionary.page.content.syntax import
|
|
32
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class RendererChainFactory:
|
|
36
36
|
def __init__(
|
|
37
37
|
self,
|
|
38
38
|
rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
|
|
39
|
-
syntax_registry:
|
|
39
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
40
40
|
) -> None:
|
|
41
|
-
self._rich_text_markdown_converter =
|
|
42
|
-
|
|
41
|
+
self._rich_text_markdown_converter = (
|
|
42
|
+
rich_text_markdown_converter or RichTextToMarkdownConverter()
|
|
43
|
+
)
|
|
44
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
43
45
|
|
|
44
46
|
def create(self) -> BlockRenderer:
|
|
45
47
|
# Strukturelle Blocks
|
|
@@ -219,13 +221,17 @@ class RendererChainFactory:
|
|
|
219
221
|
return BreadcrumbRenderer(syntax_registry=self._syntax_registry)
|
|
220
222
|
|
|
221
223
|
def _create_table_renderer(self) -> TableRenderer:
|
|
222
|
-
return TableRenderer(
|
|
224
|
+
return TableRenderer(
|
|
225
|
+
rich_text_markdown_converter=self._rich_text_markdown_converter
|
|
226
|
+
)
|
|
223
227
|
|
|
224
228
|
def _create_table_row_handler(self) -> TableRowHandler:
|
|
225
229
|
return TableRowHandler()
|
|
226
230
|
|
|
227
231
|
def _create_paragraph_renderer(self) -> ParagraphRenderer:
|
|
228
|
-
return ParagraphRenderer(
|
|
232
|
+
return ParagraphRenderer(
|
|
233
|
+
rich_text_markdown_converter=self._rich_text_markdown_converter
|
|
234
|
+
)
|
|
229
235
|
|
|
230
236
|
def _create_fallback_renderer(self) -> FallbackRenderer:
|
|
231
237
|
return FallbackRenderer()
|
|
@@ -3,7 +3,7 @@ from enum import IntEnum
|
|
|
3
3
|
from typing import override
|
|
4
4
|
|
|
5
5
|
from notionary.page.content.renderer.post_processing.port import PostProcessor
|
|
6
|
-
from notionary.page.content.syntax.grammar import MarkdownGrammar
|
|
6
|
+
from notionary.page.content.syntax.definition.grammar import MarkdownGrammar
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class _NumberingStyle(IntEnum):
|
|
@@ -89,7 +89,9 @@ class NumberedListPlaceholderReplacerPostProcessor(PostProcessor):
|
|
|
89
89
|
def __init__(self, markdown_grammar: MarkdownGrammar | None = None) -> None:
|
|
90
90
|
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
91
91
|
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
92
|
-
self._numbered_list_placeholder =
|
|
92
|
+
self._numbered_list_placeholder = (
|
|
93
|
+
self._markdown_grammar.numbered_list_placeholder
|
|
94
|
+
)
|
|
93
95
|
|
|
94
96
|
@override
|
|
95
97
|
def process(self, markdown_text: str) -> str:
|
|
@@ -130,23 +132,31 @@ class NumberedListPlaceholderReplacerPostProcessor(PostProcessor):
|
|
|
130
132
|
return match.group(1) if match else ""
|
|
131
133
|
|
|
132
134
|
def _extract_content(self, line: str) -> str:
|
|
133
|
-
match = re.match(
|
|
135
|
+
match = re.match(
|
|
136
|
+
rf"^\s*{re.escape(self._numbered_list_placeholder)}\.\s*(.*)", line
|
|
137
|
+
)
|
|
134
138
|
return match.group(1) if match else ""
|
|
135
139
|
|
|
136
140
|
def _is_placeholder_list_item(self, line: str) -> bool:
|
|
137
|
-
return bool(
|
|
141
|
+
return bool(
|
|
142
|
+
re.match(rf"^\s*{re.escape(self._numbered_list_placeholder)}\.", line)
|
|
143
|
+
)
|
|
138
144
|
|
|
139
|
-
def _is_blank_between_list_items(
|
|
145
|
+
def _is_blank_between_list_items(
|
|
146
|
+
self, lines: list[str], current_index: int, processed_lines: list[str]
|
|
147
|
+
) -> bool:
|
|
140
148
|
if not self._is_blank(lines[current_index]):
|
|
141
149
|
return False
|
|
142
150
|
|
|
143
|
-
previous_line_was_list_item =
|
|
151
|
+
previous_line_was_list_item = (
|
|
152
|
+
processed_lines and self._looks_like_numbered_list_item(processed_lines[-1])
|
|
153
|
+
)
|
|
144
154
|
if not previous_line_was_list_item:
|
|
145
155
|
return False
|
|
146
156
|
|
|
147
|
-
next_line_is_list_item = current_index + 1 < len(
|
|
148
|
-
lines
|
|
149
|
-
)
|
|
157
|
+
next_line_is_list_item = current_index + 1 < len(
|
|
158
|
+
lines
|
|
159
|
+
) and self._is_placeholder_list_item(lines[current_index + 1])
|
|
150
160
|
return next_line_is_list_item
|
|
151
161
|
|
|
152
162
|
def _is_blank(self, line: str) -> bool:
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from notionary.blocks.schemas import (
|
|
4
|
+
Block,
|
|
5
|
+
BlockType,
|
|
6
|
+
ExternalFileWithCaption,
|
|
7
|
+
NotionHostedFileWithCaption,
|
|
8
|
+
)
|
|
9
|
+
from notionary.page.content.renderer.renderers.file_like_block import (
|
|
10
|
+
FileLikeBlockRenderer,
|
|
11
|
+
)
|
|
12
|
+
from notionary.page.content.syntax.definition import EnclosedSyntaxDefinition
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
class AudioRenderer(FileLikeBlockRenderer):
|
|
@@ -11,9 +18,11 @@ class AudioRenderer(FileLikeBlockRenderer):
|
|
|
11
18
|
return block.type == BlockType.AUDIO
|
|
12
19
|
|
|
13
20
|
@override
|
|
14
|
-
def _get_syntax(self) ->
|
|
21
|
+
def _get_syntax(self) -> EnclosedSyntaxDefinition:
|
|
15
22
|
return self._syntax_registry.get_audio_syntax()
|
|
16
23
|
|
|
17
24
|
@override
|
|
18
|
-
def _get_file_data(
|
|
25
|
+
def _get_file_data(
|
|
26
|
+
self, block: Block
|
|
27
|
+
) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
28
|
return block.audio
|
|
@@ -4,12 +4,12 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
|
|
5
5
|
from notionary.blocks.schemas import Block
|
|
6
6
|
from notionary.page.content.renderer.context import MarkdownRenderingContext
|
|
7
|
-
from notionary.page.content.syntax import
|
|
7
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class BlockRenderer(ABC):
|
|
11
|
-
def __init__(self, syntax_registry:
|
|
12
|
-
self._syntax_registry = syntax_registry or
|
|
11
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry | None = None) -> None:
|
|
12
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
13
13
|
self._next_handler: BlockRenderer | None = None
|
|
14
14
|
|
|
15
15
|
def set_next(self, handler: BlockRenderer) -> BlockRenderer:
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.captioned_block import
|
|
4
|
+
from notionary.page.content.renderer.renderers.captioned_block import (
|
|
5
|
+
CaptionedBlockRenderer,
|
|
6
|
+
)
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class BookmarkRenderer(CaptionedBlockRenderer):
|
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.rich_text.rich_text_markdown_converter import
|
|
3
|
+
from notionary.blocks.rich_text.rich_text_markdown_converter import (
|
|
4
|
+
RichTextToMarkdownConverter,
|
|
5
|
+
)
|
|
4
6
|
from notionary.blocks.schemas import Block, BlockType
|
|
5
7
|
from notionary.page.content.renderer.context import MarkdownRenderingContext
|
|
6
8
|
from notionary.page.content.renderer.renderers.base import BlockRenderer
|
|
7
|
-
from notionary.page.content.syntax import
|
|
9
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class BulletedListRenderer(BlockRenderer):
|
|
11
13
|
def __init__(
|
|
12
14
|
self,
|
|
13
|
-
syntax_registry:
|
|
15
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
14
16
|
rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
|
|
15
17
|
) -> None:
|
|
16
18
|
super().__init__(syntax_registry=syntax_registry)
|
|
17
|
-
self._rich_text_markdown_converter =
|
|
19
|
+
self._rich_text_markdown_converter = (
|
|
20
|
+
rich_text_markdown_converter or RichTextToMarkdownConverter()
|
|
21
|
+
)
|
|
18
22
|
|
|
19
23
|
@override
|
|
20
24
|
def _can_handle(self, block: Block) -> bool:
|
|
@@ -45,4 +49,6 @@ class BulletedListRenderer(BlockRenderer):
|
|
|
45
49
|
if not block.bulleted_list_item or not block.bulleted_list_item.rich_text:
|
|
46
50
|
return None
|
|
47
51
|
|
|
48
|
-
return await self._rich_text_markdown_converter.to_markdown(
|
|
52
|
+
return await self._rich_text_markdown_converter.to_markdown(
|
|
53
|
+
block.bulleted_list_item.rich_text
|
|
54
|
+
)
|
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.rich_text.rich_text_markdown_converter import
|
|
3
|
+
from notionary.blocks.rich_text.rich_text_markdown_converter import (
|
|
4
|
+
RichTextToMarkdownConverter,
|
|
5
|
+
)
|
|
4
6
|
from notionary.blocks.schemas import Block, BlockType
|
|
5
7
|
from notionary.page.content.renderer.context import MarkdownRenderingContext
|
|
6
8
|
from notionary.page.content.renderer.renderers.base import BlockRenderer
|
|
7
|
-
from notionary.page.content.syntax import
|
|
9
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class CalloutRenderer(BlockRenderer):
|
|
11
13
|
def __init__(
|
|
12
14
|
self,
|
|
13
|
-
syntax_registry:
|
|
15
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
14
16
|
rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
|
|
15
17
|
) -> None:
|
|
16
18
|
super().__init__(syntax_registry=syntax_registry)
|
|
17
|
-
self._rich_text_markdown_converter =
|
|
19
|
+
self._rich_text_markdown_converter = (
|
|
20
|
+
rich_text_markdown_converter or RichTextToMarkdownConverter()
|
|
21
|
+
)
|
|
18
22
|
|
|
19
23
|
@override
|
|
20
24
|
def _can_handle(self, block: Block) -> bool:
|
|
@@ -30,9 +34,15 @@ class CalloutRenderer(BlockRenderer):
|
|
|
30
34
|
|
|
31
35
|
icon = await self._extract_callout_icon(context.block)
|
|
32
36
|
|
|
33
|
-
callout_start_delimiter =
|
|
37
|
+
callout_start_delimiter = (
|
|
38
|
+
self._syntax_registry.get_callout_syntax().start_delimiter
|
|
39
|
+
)
|
|
34
40
|
|
|
35
|
-
result =
|
|
41
|
+
result = (
|
|
42
|
+
f'{callout_start_delimiter}({content} "{icon}")'
|
|
43
|
+
if icon
|
|
44
|
+
else f"{callout_start_delimiter}({content})"
|
|
45
|
+
)
|
|
36
46
|
|
|
37
47
|
if context.indent_level > 0:
|
|
38
48
|
result = context.indent_text(result)
|
|
@@ -47,4 +57,6 @@ class CalloutRenderer(BlockRenderer):
|
|
|
47
57
|
async def _extract_callout_content(self, block: Block) -> str:
|
|
48
58
|
if not block.callout or not block.callout.rich_text:
|
|
49
59
|
return ""
|
|
50
|
-
return await self._rich_text_markdown_converter.to_markdown(
|
|
60
|
+
return await self._rich_text_markdown_converter.to_markdown(
|
|
61
|
+
block.callout.rich_text
|
|
62
|
+
)
|
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
|
-
from notionary.blocks.rich_text.rich_text_markdown_converter import
|
|
4
|
+
from notionary.blocks.rich_text.rich_text_markdown_converter import (
|
|
5
|
+
RichTextToMarkdownConverter,
|
|
6
|
+
)
|
|
5
7
|
from notionary.blocks.schemas import Block
|
|
6
8
|
from notionary.page.content.renderer.context import MarkdownRenderingContext
|
|
7
9
|
from notionary.page.content.renderer.renderers.base import BlockRenderer
|
|
8
|
-
from notionary.page.content.syntax import
|
|
10
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class CaptionedBlockRenderer(BlockRenderer):
|
|
12
14
|
def __init__(
|
|
13
15
|
self,
|
|
14
|
-
syntax_registry:
|
|
16
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
15
17
|
rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
|
|
16
18
|
) -> None:
|
|
17
19
|
super().__init__(syntax_registry=syntax_registry)
|
|
18
|
-
self._rich_text_markdown_converter =
|
|
20
|
+
self._rich_text_markdown_converter = (
|
|
21
|
+
rich_text_markdown_converter or RichTextToMarkdownConverter()
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
@abstractmethod
|
|
21
25
|
async def _render_main_content(self, block: Block) -> str:
|
|
@@ -53,6 +57,8 @@ class CaptionedBlockRenderer(BlockRenderer):
|
|
|
53
57
|
if not caption_rich_text:
|
|
54
58
|
return ""
|
|
55
59
|
|
|
56
|
-
caption_markdown = await self._rich_text_markdown_converter.to_markdown(
|
|
60
|
+
caption_markdown = await self._rich_text_markdown_converter.to_markdown(
|
|
61
|
+
caption_rich_text
|
|
62
|
+
)
|
|
57
63
|
|
|
58
64
|
return f"\n[caption] {caption_markdown}"
|
|
@@ -2,7 +2,9 @@ from typing import override
|
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.enums import BlockType
|
|
4
4
|
from notionary.blocks.schemas import Block
|
|
5
|
-
from notionary.page.content.renderer.renderers.captioned_block import
|
|
5
|
+
from notionary.page.content.renderer.renderers.captioned_block import (
|
|
6
|
+
CaptionedBlockRenderer,
|
|
7
|
+
)
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class CodeRenderer(CaptionedBlockRenderer):
|
|
@@ -31,4 +33,6 @@ class CodeRenderer(CaptionedBlockRenderer):
|
|
|
31
33
|
async def _extract_code_content(self, block: Block) -> str:
|
|
32
34
|
if not block.code or not block.code.rich_text:
|
|
33
35
|
return ""
|
|
34
|
-
return await self._rich_text_markdown_converter.to_markdown(
|
|
36
|
+
return await self._rich_text_markdown_converter.to_markdown(
|
|
37
|
+
block.code.rich_text
|
|
38
|
+
)
|
|
@@ -42,7 +42,9 @@ class ColumnRenderer(BlockRenderer):
|
|
|
42
42
|
|
|
43
43
|
return delimiter
|
|
44
44
|
|
|
45
|
-
async def _render_children_with_indentation(
|
|
45
|
+
async def _render_children_with_indentation(
|
|
46
|
+
self, context: MarkdownRenderingContext
|
|
47
|
+
) -> str:
|
|
46
48
|
original_indent = context.indent_level
|
|
47
49
|
context.indent_level += 1
|
|
48
50
|
|
|
@@ -33,7 +33,9 @@ class ColumnListRenderer(BlockRenderer):
|
|
|
33
33
|
def _get_column_list_delimiter(self) -> str:
|
|
34
34
|
return self._syntax_registry.get_column_list_syntax().start_delimiter
|
|
35
35
|
|
|
36
|
-
async def _render_children_with_indentation(
|
|
36
|
+
async def _render_children_with_indentation(
|
|
37
|
+
self, context: MarkdownRenderingContext
|
|
38
|
+
) -> str:
|
|
37
39
|
original_indent = context.indent_level
|
|
38
40
|
context.indent_level += 1
|
|
39
41
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.captioned_block import
|
|
4
|
+
from notionary.page.content.renderer.renderers.captioned_block import (
|
|
5
|
+
CaptionedBlockRenderer,
|
|
6
|
+
)
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class EmbedRenderer(CaptionedBlockRenderer):
|
|
@@ -19,7 +19,9 @@ class EquationRenderer(BlockRenderer):
|
|
|
19
19
|
return
|
|
20
20
|
|
|
21
21
|
syntax = self._syntax_registry.get_equation_syntax()
|
|
22
|
-
equation_markdown =
|
|
22
|
+
equation_markdown = (
|
|
23
|
+
f"{syntax.start_delimiter}{expression}{syntax.end_delimiter}"
|
|
24
|
+
)
|
|
23
25
|
|
|
24
26
|
if context.indent_level > 0:
|
|
25
27
|
equation_markdown = context.indent_text(equation_markdown)
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from notionary.blocks.schemas import (
|
|
4
|
+
Block,
|
|
5
|
+
BlockType,
|
|
6
|
+
ExternalFileWithCaption,
|
|
7
|
+
NotionHostedFileWithCaption,
|
|
8
|
+
)
|
|
9
|
+
from notionary.page.content.renderer.renderers.file_like_block import (
|
|
10
|
+
FileLikeBlockRenderer,
|
|
11
|
+
)
|
|
12
|
+
from notionary.page.content.syntax.definition import EnclosedSyntaxDefinition
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
class FileRenderer(FileLikeBlockRenderer):
|
|
@@ -11,11 +18,13 @@ class FileRenderer(FileLikeBlockRenderer):
|
|
|
11
18
|
return block.type == BlockType.FILE
|
|
12
19
|
|
|
13
20
|
@override
|
|
14
|
-
def _get_syntax(self) ->
|
|
21
|
+
def _get_syntax(self) -> EnclosedSyntaxDefinition:
|
|
15
22
|
return self._syntax_registry.get_file_syntax()
|
|
16
23
|
|
|
17
24
|
@override
|
|
18
|
-
def _get_file_data(
|
|
25
|
+
def _get_file_data(
|
|
26
|
+
self, block: Block
|
|
27
|
+
) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
28
|
return block.file
|
|
20
29
|
|
|
21
30
|
def _extract_file_name(self, block: Block) -> str:
|