notionary 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +49 -1
- notionary/blocks/client.py +37 -11
- notionary/blocks/enums.py +0 -6
- notionary/blocks/rich_text/markdown_rich_text_converter.py +49 -15
- notionary/blocks/rich_text/models.py +13 -4
- notionary/blocks/rich_text/name_id_resolver/data_source.py +9 -3
- notionary/blocks/rich_text/name_id_resolver/person.py +6 -2
- notionary/blocks/rich_text/rich_text_markdown_converter.py +10 -3
- notionary/blocks/schemas.py +33 -78
- notionary/comments/client.py +19 -6
- notionary/comments/factory.py +10 -3
- notionary/comments/schemas.py +10 -31
- notionary/comments/service.py +12 -4
- notionary/data_source/http/data_source_instance_client.py +59 -17
- notionary/data_source/properties/schemas.py +156 -115
- notionary/data_source/query/builder.py +67 -18
- notionary/data_source/query/resolver.py +16 -5
- notionary/data_source/query/schema.py +24 -6
- notionary/data_source/query/validator.py +18 -6
- notionary/data_source/schema/registry.py +31 -12
- notionary/data_source/schema/service.py +66 -20
- notionary/data_source/schemas.py +2 -2
- notionary/data_source/service.py +103 -43
- notionary/database/client.py +27 -9
- notionary/database/database_metadata_update_client.py +12 -4
- notionary/database/schemas.py +2 -2
- notionary/database/service.py +14 -9
- notionary/exceptions/__init__.py +20 -4
- notionary/exceptions/api.py +2 -2
- notionary/exceptions/base.py +1 -1
- notionary/exceptions/block_parsing.py +9 -5
- notionary/exceptions/data_source/builder.py +13 -7
- notionary/exceptions/data_source/properties.py +6 -4
- notionary/exceptions/file_upload.py +76 -0
- notionary/exceptions/properties.py +7 -5
- notionary/exceptions/search.py +10 -6
- notionary/file_upload/__init__.py +4 -0
- notionary/file_upload/client.py +128 -210
- notionary/file_upload/config/__init__.py +17 -0
- notionary/file_upload/config/config.py +39 -0
- notionary/file_upload/config/constants.py +16 -0
- notionary/file_upload/file/reader.py +28 -0
- notionary/file_upload/query/__init__.py +7 -0
- notionary/file_upload/query/builder.py +58 -0
- notionary/file_upload/query/models.py +37 -0
- notionary/file_upload/schemas.py +80 -0
- notionary/file_upload/service.py +182 -291
- notionary/file_upload/validation/factory.py +66 -0
- notionary/file_upload/validation/impl/file_name_length.py +25 -0
- notionary/file_upload/validation/models.py +134 -0
- notionary/file_upload/validation/port.py +7 -0
- notionary/file_upload/validation/service.py +17 -0
- notionary/file_upload/validation/validators/__init__.py +11 -0
- notionary/file_upload/validation/validators/file_exists.py +15 -0
- notionary/file_upload/validation/validators/file_extension.py +131 -0
- notionary/file_upload/validation/validators/file_name_length.py +21 -0
- notionary/file_upload/validation/validators/upload_limit.py +31 -0
- notionary/http/client.py +33 -30
- notionary/page/content/__init__.py +9 -0
- notionary/page/content/factory.py +21 -7
- notionary/page/content/markdown/builder.py +85 -23
- notionary/page/content/markdown/nodes/audio.py +8 -4
- notionary/page/content/markdown/nodes/base.py +3 -3
- notionary/page/content/markdown/nodes/bookmark.py +5 -3
- notionary/page/content/markdown/nodes/breadcrumb.py +2 -2
- notionary/page/content/markdown/nodes/bulleted_list.py +5 -3
- notionary/page/content/markdown/nodes/callout.py +2 -2
- notionary/page/content/markdown/nodes/code.py +5 -3
- notionary/page/content/markdown/nodes/columns.py +3 -3
- notionary/page/content/markdown/nodes/container.py +9 -5
- notionary/page/content/markdown/nodes/divider.py +2 -2
- notionary/page/content/markdown/nodes/embed.py +8 -4
- notionary/page/content/markdown/nodes/equation.py +4 -2
- notionary/page/content/markdown/nodes/file.py +8 -4
- notionary/page/content/markdown/nodes/heading.py +2 -2
- notionary/page/content/markdown/nodes/image.py +8 -4
- notionary/page/content/markdown/nodes/mixins/caption.py +5 -3
- notionary/page/content/markdown/nodes/numbered_list.py +5 -3
- notionary/page/content/markdown/nodes/paragraph.py +4 -2
- notionary/page/content/markdown/nodes/pdf.py +8 -4
- notionary/page/content/markdown/nodes/quote.py +2 -2
- notionary/page/content/markdown/nodes/space.py +2 -2
- notionary/page/content/markdown/nodes/table.py +8 -5
- notionary/page/content/markdown/nodes/table_of_contents.py +2 -2
- notionary/page/content/markdown/nodes/todo.py +15 -7
- notionary/page/content/markdown/nodes/toggle.py +2 -2
- notionary/page/content/markdown/nodes/video.py +8 -4
- notionary/page/content/markdown/structured_output/__init__.py +73 -0
- notionary/page/content/markdown/structured_output/models.py +391 -0
- notionary/page/content/markdown/structured_output/service.py +211 -0
- notionary/page/content/parser/context.py +1 -1
- notionary/page/content/parser/factory.py +26 -8
- notionary/page/content/parser/parsers/audio.py +12 -32
- notionary/page/content/parser/parsers/base.py +2 -2
- notionary/page/content/parser/parsers/bookmark.py +2 -2
- notionary/page/content/parser/parsers/breadcrumb.py +2 -2
- notionary/page/content/parser/parsers/bulleted_list.py +19 -6
- notionary/page/content/parser/parsers/callout.py +15 -5
- notionary/page/content/parser/parsers/caption.py +9 -3
- notionary/page/content/parser/parsers/code.py +21 -7
- notionary/page/content/parser/parsers/column.py +8 -4
- notionary/page/content/parser/parsers/column_list.py +19 -7
- notionary/page/content/parser/parsers/divider.py +2 -2
- notionary/page/content/parser/parsers/embed.py +2 -4
- notionary/page/content/parser/parsers/equation.py +8 -4
- notionary/page/content/parser/parsers/file.py +12 -34
- notionary/page/content/parser/parsers/file_like_block.py +109 -0
- notionary/page/content/parser/parsers/heading.py +31 -10
- notionary/page/content/parser/parsers/image.py +12 -34
- notionary/page/content/parser/parsers/numbered_list.py +18 -6
- notionary/page/content/parser/parsers/paragraph.py +3 -1
- notionary/page/content/parser/parsers/pdf.py +12 -34
- notionary/page/content/parser/parsers/quote.py +28 -9
- notionary/page/content/parser/parsers/space.py +2 -2
- notionary/page/content/parser/parsers/table.py +31 -10
- notionary/page/content/parser/parsers/table_of_contents.py +7 -3
- notionary/page/content/parser/parsers/todo.py +15 -5
- notionary/page/content/parser/parsers/toggle.py +15 -5
- notionary/page/content/parser/parsers/video.py +12 -34
- notionary/page/content/parser/post_processing/handlers/rich_text_length.py +8 -2
- notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +8 -2
- notionary/page/content/parser/post_processing/service.py +3 -1
- notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +21 -7
- notionary/page/content/parser/pre_processsing/handlers/indentation.py +11 -4
- notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +13 -6
- notionary/page/content/parser/service.py +4 -1
- notionary/page/content/renderer/context.py +15 -5
- notionary/page/content/renderer/factory.py +12 -6
- notionary/page/content/renderer/post_processing/handlers/numbered_list.py +19 -9
- notionary/page/content/renderer/renderers/audio.py +20 -23
- notionary/page/content/renderer/renderers/base.py +3 -3
- notionary/page/content/renderer/renderers/bookmark.py +3 -1
- notionary/page/content/renderer/renderers/bulleted_list.py +11 -5
- notionary/page/content/renderer/renderers/callout.py +19 -7
- notionary/page/content/renderer/renderers/captioned_block.py +11 -5
- notionary/page/content/renderer/renderers/code.py +6 -2
- notionary/page/content/renderer/renderers/column.py +3 -1
- notionary/page/content/renderer/renderers/column_list.py +3 -1
- notionary/page/content/renderer/renderers/embed.py +3 -1
- notionary/page/content/renderer/renderers/equation.py +3 -1
- notionary/page/content/renderer/renderers/file.py +20 -23
- notionary/page/content/renderer/renderers/file_like_block.py +47 -0
- notionary/page/content/renderer/renderers/heading.py +22 -8
- notionary/page/content/renderer/renderers/image.py +20 -23
- notionary/page/content/renderer/renderers/numbered_list.py +8 -3
- notionary/page/content/renderer/renderers/paragraph.py +12 -4
- notionary/page/content/renderer/renderers/pdf.py +20 -23
- notionary/page/content/renderer/renderers/quote.py +14 -6
- notionary/page/content/renderer/renderers/table.py +15 -5
- notionary/page/content/renderer/renderers/todo.py +16 -6
- notionary/page/content/renderer/renderers/toggle.py +8 -4
- notionary/page/content/renderer/renderers/video.py +20 -23
- notionary/page/content/renderer/service.py +9 -3
- notionary/page/content/service.py +21 -7
- notionary/page/content/syntax/definition/__init__.py +11 -0
- notionary/page/content/syntax/definition/models.py +57 -0
- notionary/page/content/syntax/definition/registry.py +371 -0
- notionary/page/content/syntax/prompts/__init__.py +4 -0
- notionary/page/content/syntax/prompts/models.py +11 -0
- notionary/page/content/syntax/prompts/registry.py +703 -0
- notionary/page/page_metadata_update_client.py +12 -4
- notionary/page/properties/client.py +46 -16
- notionary/page/properties/factory.py +6 -2
- notionary/page/properties/{models.py → schemas.py} +93 -107
- notionary/page/properties/service.py +111 -37
- notionary/page/schemas.py +3 -3
- notionary/page/service.py +21 -7
- notionary/shared/entity/client.py +6 -2
- notionary/shared/entity/dto_parsers.py +4 -37
- notionary/shared/entity/entity_metadata_update_client.py +25 -5
- notionary/shared/entity/schemas.py +6 -6
- notionary/shared/entity/service.py +89 -35
- notionary/shared/models/file.py +36 -6
- notionary/shared/models/icon.py +5 -12
- notionary/user/base.py +6 -2
- notionary/user/bot.py +22 -14
- notionary/user/client.py +3 -1
- notionary/user/person.py +3 -1
- notionary/user/schemas.py +3 -1
- notionary/user/service.py +6 -2
- notionary/utils/decorators.py +13 -9
- notionary/utils/fuzzy.py +6 -2
- notionary/utils/mixins/logging.py +3 -1
- notionary/utils/pagination.py +14 -4
- notionary/workspace/__init__.py +6 -2
- notionary/workspace/query/__init__.py +2 -1
- notionary/workspace/query/service.py +42 -13
- notionary/workspace/service.py +74 -46
- {notionary-0.3.1.dist-info → notionary-0.4.1.dist-info}/METADATA +1 -1
- notionary-0.4.1.dist-info/RECORD +236 -0
- notionary/file_upload/models.py +0 -69
- notionary/page/blocks/client.py +0 -1
- notionary/page/content/syntax/__init__.py +0 -4
- notionary/page/content/syntax/models.py +0 -66
- notionary/page/content/syntax/registry.py +0 -393
- notionary/page/page_context.py +0 -50
- notionary/shared/models/cover.py +0 -20
- notionary-0.3.1.dist-info/RECORD +0 -211
- /notionary/page/content/syntax/{grammar.py → definition/grammar.py} +0 -0
- {notionary-0.3.1.dist-info → notionary-0.4.1.dist-info}/WHEEL +0 -0
- {notionary-0.3.1.dist-info → notionary-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,25 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.rich_text.markdown_rich_text_converter import
|
|
3
|
+
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
4
|
+
MarkdownRichTextConverter,
|
|
5
|
+
)
|
|
4
6
|
from notionary.blocks.rich_text.models import RichText
|
|
5
|
-
from notionary.blocks.schemas import
|
|
7
|
+
from notionary.blocks.schemas import (
|
|
8
|
+
CreateTableBlock,
|
|
9
|
+
CreateTableData,
|
|
10
|
+
CreateTableRowBlock,
|
|
11
|
+
TableRowData,
|
|
12
|
+
)
|
|
6
13
|
from notionary.page.content.parser.parsers import BlockParsingContext, LineParser
|
|
7
|
-
from notionary.page.content.syntax import
|
|
14
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
8
15
|
|
|
9
16
|
|
|
10
17
|
class TableParser(LineParser):
|
|
11
|
-
def __init__(
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
21
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
22
|
+
) -> None:
|
|
12
23
|
super().__init__(syntax_registry)
|
|
13
24
|
self._syntax = syntax_registry.get_table_syntax()
|
|
14
25
|
self._separator_syntax = syntax_registry.get_table_row_syntax()
|
|
@@ -41,7 +52,9 @@ class TableParser(LineParser):
|
|
|
41
52
|
context.lines_consumed = lines_consumed
|
|
42
53
|
context.result_blocks.append(block)
|
|
43
54
|
|
|
44
|
-
def _collect_table_lines(
|
|
55
|
+
def _collect_table_lines(
|
|
56
|
+
self, table_lines: list[str], remaining_lines: list[str]
|
|
57
|
+
) -> int:
|
|
45
58
|
lines_consumed = 0
|
|
46
59
|
|
|
47
60
|
for index, line in enumerate(remaining_lines):
|
|
@@ -62,9 +75,13 @@ class TableParser(LineParser):
|
|
|
62
75
|
return lines_consumed
|
|
63
76
|
|
|
64
77
|
def _is_table_line(self, line: str) -> bool:
|
|
65
|
-
return self._syntax.regex_pattern.match(
|
|
78
|
+
return self._syntax.regex_pattern.match(
|
|
79
|
+
line
|
|
80
|
+
) or self._separator_syntax.regex_pattern.match(line)
|
|
66
81
|
|
|
67
|
-
async def _create_table_block(
|
|
82
|
+
async def _create_table_block(
|
|
83
|
+
self, table_lines: list[str]
|
|
84
|
+
) -> CreateTableBlock | None:
|
|
68
85
|
if not table_lines:
|
|
69
86
|
return None
|
|
70
87
|
|
|
@@ -93,7 +110,9 @@ class TableParser(LineParser):
|
|
|
93
110
|
return line_stripped
|
|
94
111
|
return None
|
|
95
112
|
|
|
96
|
-
async def _process_table_rows(
|
|
113
|
+
async def _process_table_rows(
|
|
114
|
+
self, table_lines: list[str]
|
|
115
|
+
) -> tuple[list[CreateTableRowBlock], bool]:
|
|
97
116
|
table_rows = []
|
|
98
117
|
has_separator = False
|
|
99
118
|
|
|
@@ -122,7 +141,9 @@ class TableParser(LineParser):
|
|
|
122
141
|
table_row_data = TableRowData(cells=rich_text_cells)
|
|
123
142
|
return CreateTableRowBlock(table_row=table_row_data)
|
|
124
143
|
|
|
125
|
-
async def _convert_cells_to_rich_text(
|
|
144
|
+
async def _convert_cells_to_rich_text(
|
|
145
|
+
self, cells: list[str]
|
|
146
|
+
) -> list[list[RichText]]:
|
|
126
147
|
rich_text_cells = []
|
|
127
148
|
|
|
128
149
|
for cell in cells:
|
|
@@ -132,7 +153,7 @@ class TableParser(LineParser):
|
|
|
132
153
|
return rich_text_cells
|
|
133
154
|
|
|
134
155
|
def _parse_table_row(self, row_text: str) -> list[str]:
|
|
135
|
-
"""Parse a table row by splitting on the table delimiter from
|
|
156
|
+
"""Parse a table row by splitting on the table delimiter from SyntaxDefinitionRegistry."""
|
|
136
157
|
row_content = row_text.strip()
|
|
137
158
|
delimiter = self._syntax.start_delimiter
|
|
138
159
|
|
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import
|
|
3
|
+
from notionary.blocks.schemas import (
|
|
4
|
+
BlockColor,
|
|
5
|
+
CreateTableOfContentsBlock,
|
|
6
|
+
TableOfContentsData,
|
|
7
|
+
)
|
|
4
8
|
from notionary.page.content.parser.parsers.base import (
|
|
5
9
|
BlockParsingContext,
|
|
6
10
|
LineParser,
|
|
7
11
|
)
|
|
8
|
-
from notionary.page.content.syntax import
|
|
12
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class TableOfContentsParser(LineParser):
|
|
12
|
-
def __init__(self, syntax_registry:
|
|
16
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry) -> None:
|
|
13
17
|
super().__init__(syntax_registry)
|
|
14
18
|
self._syntax = syntax_registry.get_table_of_contents_syntax()
|
|
15
19
|
|
|
@@ -8,11 +8,15 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
8
8
|
BlockParsingContext,
|
|
9
9
|
LineParser,
|
|
10
10
|
)
|
|
11
|
-
from notionary.page.content.syntax import
|
|
11
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class TodoParser(LineParser):
|
|
15
|
-
def __init__(
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
18
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
19
|
+
) -> None:
|
|
16
20
|
super().__init__(syntax_registry)
|
|
17
21
|
self._syntax = syntax_registry.get_todo_syntax()
|
|
18
22
|
self._syntax_done = syntax_registry.get_todo_done_syntax()
|
|
@@ -39,7 +43,9 @@ class TodoParser(LineParser):
|
|
|
39
43
|
await self._process_nested_children(block, context)
|
|
40
44
|
context.result_blocks.append(block)
|
|
41
45
|
|
|
42
|
-
async def _process_nested_children(
|
|
46
|
+
async def _process_nested_children(
|
|
47
|
+
self, block: CreateToDoBlock, context: BlockParsingContext
|
|
48
|
+
) -> None:
|
|
43
49
|
child_lines = self._collect_child_lines(context)
|
|
44
50
|
if not child_lines:
|
|
45
51
|
return
|
|
@@ -54,12 +60,16 @@ class TodoParser(LineParser):
|
|
|
54
60
|
parent_indent_level = context.get_line_indentation_level()
|
|
55
61
|
return context.collect_indented_child_lines(parent_indent_level)
|
|
56
62
|
|
|
57
|
-
async def _parse_child_blocks(
|
|
63
|
+
async def _parse_child_blocks(
|
|
64
|
+
self, child_lines: list[str], context: BlockParsingContext
|
|
65
|
+
) -> list[CreateToDoBlock]:
|
|
58
66
|
stripped_lines = self._remove_parent_indentation(child_lines, context)
|
|
59
67
|
children_text = self._convert_lines_to_text(stripped_lines)
|
|
60
68
|
return await context.parse_nested_markdown(children_text)
|
|
61
69
|
|
|
62
|
-
def _remove_parent_indentation(
|
|
70
|
+
def _remove_parent_indentation(
|
|
71
|
+
self, lines: list[str], context: BlockParsingContext
|
|
72
|
+
) -> list[str]:
|
|
63
73
|
return context.strip_indentation_level(lines, levels=1)
|
|
64
74
|
|
|
65
75
|
def _convert_lines_to_text(self, lines: list[str]) -> str:
|
|
@@ -1,16 +1,22 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.rich_text.markdown_rich_text_converter import
|
|
3
|
+
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
4
|
+
MarkdownRichTextConverter,
|
|
5
|
+
)
|
|
4
6
|
from notionary.blocks.schemas import BlockColor, CreateToggleBlock, CreateToggleData
|
|
5
7
|
from notionary.page.content.parser.parsers import (
|
|
6
8
|
BlockParsingContext,
|
|
7
9
|
LineParser,
|
|
8
10
|
)
|
|
9
|
-
from notionary.page.content.syntax import
|
|
11
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class ToggleParser(LineParser):
|
|
13
|
-
def __init__(
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
18
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
19
|
+
) -> None:
|
|
14
20
|
super().__init__(syntax_registry)
|
|
15
21
|
self._syntax = syntax_registry.get_toggle_syntax()
|
|
16
22
|
self._heading_syntax = syntax_registry.get_toggleable_heading_syntax()
|
|
@@ -51,10 +57,14 @@ class ToggleParser(LineParser):
|
|
|
51
57
|
title = match.group(1).strip()
|
|
52
58
|
rich_text = await self._rich_text_converter.to_rich_text(title)
|
|
53
59
|
|
|
54
|
-
toggle_content = CreateToggleData(
|
|
60
|
+
toggle_content = CreateToggleData(
|
|
61
|
+
rich_text=rich_text, color=BlockColor.DEFAULT, children=[]
|
|
62
|
+
)
|
|
55
63
|
return CreateToggleBlock(toggle=toggle_content)
|
|
56
64
|
|
|
57
|
-
async def _process_nested_children(
|
|
65
|
+
async def _process_nested_children(
|
|
66
|
+
self, block: CreateToggleBlock, context: BlockParsingContext
|
|
67
|
+
) -> None:
|
|
58
68
|
parent_indent_level = context.get_line_indentation_level()
|
|
59
69
|
child_lines = context.collect_indented_child_lines(parent_indent_level)
|
|
60
70
|
|
|
@@ -1,42 +1,20 @@
|
|
|
1
|
-
"""Parser for video blocks."""
|
|
2
|
-
|
|
3
1
|
from typing import override
|
|
4
2
|
|
|
5
|
-
from notionary.blocks.schemas import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
3
|
+
from notionary.blocks.schemas import CreateVideoBlock, ExternalFileWithCaption
|
|
4
|
+
from notionary.page.content.parser.parsers.file_like_block import FileLikeBlockParser
|
|
5
|
+
from notionary.page.content.syntax.definition import (
|
|
6
|
+
SyntaxDefinition,
|
|
7
|
+
SyntaxDefinitionRegistry,
|
|
10
8
|
)
|
|
11
|
-
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
12
|
-
from notionary.page.content.syntax import SyntaxRegistry
|
|
13
|
-
|
|
14
9
|
|
|
15
|
-
class VideoParser(LineParser):
|
|
16
|
-
def __init__(self, syntax_registry: SyntaxRegistry) -> None:
|
|
17
|
-
super().__init__(syntax_registry)
|
|
18
|
-
self._syntax = syntax_registry.get_video_syntax()
|
|
19
10
|
|
|
11
|
+
class VideoParser(FileLikeBlockParser[CreateVideoBlock]):
|
|
20
12
|
@override
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
return
|
|
13
|
+
def _get_syntax(
|
|
14
|
+
self, syntax_registry: SyntaxDefinitionRegistry
|
|
15
|
+
) -> SyntaxDefinition:
|
|
16
|
+
return syntax_registry.get_video_syntax()
|
|
25
17
|
|
|
26
18
|
@override
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if not url:
|
|
30
|
-
return
|
|
31
|
-
|
|
32
|
-
video_data = FileData(
|
|
33
|
-
type=FileType.EXTERNAL,
|
|
34
|
-
external=ExternalFile(url=url),
|
|
35
|
-
caption=[],
|
|
36
|
-
)
|
|
37
|
-
block = CreateVideoBlock(video=video_data)
|
|
38
|
-
context.result_blocks.append(block)
|
|
39
|
-
|
|
40
|
-
def _extract_url(self, line: str) -> str | None:
|
|
41
|
-
match = self._syntax.regex_pattern.search(line)
|
|
42
|
-
return match.group(1).strip() if match else None
|
|
19
|
+
def _create_block(self, file_data: ExternalFileWithCaption) -> CreateVideoBlock:
|
|
20
|
+
return CreateVideoBlock(video=file_data)
|
|
@@ -26,7 +26,9 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
26
26
|
flattened_blocks = self._flatten_blocks(blocks)
|
|
27
27
|
return [self._process_block(block) for block in flattened_blocks]
|
|
28
28
|
|
|
29
|
-
def _flatten_blocks(
|
|
29
|
+
def _flatten_blocks(
|
|
30
|
+
self, blocks: list[_NestedBlockList]
|
|
31
|
+
) -> list[BlockCreatePayload]:
|
|
30
32
|
flattened: list[BlockCreatePayload] = []
|
|
31
33
|
|
|
32
34
|
for item in blocks:
|
|
@@ -92,4 +94,8 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
92
94
|
return content[:cutoff] + "..."
|
|
93
95
|
|
|
94
96
|
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
95
|
-
return
|
|
97
|
+
return (
|
|
98
|
+
rich_text.type == RichTextType.TEXT
|
|
99
|
+
and rich_text.text
|
|
100
|
+
and rich_text.text.content
|
|
101
|
+
)
|
|
@@ -27,7 +27,9 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
27
27
|
flattened_blocks = self._flatten_blocks(blocks)
|
|
28
28
|
return [self._process_block(block) for block in flattened_blocks]
|
|
29
29
|
|
|
30
|
-
def _flatten_blocks(
|
|
30
|
+
def _flatten_blocks(
|
|
31
|
+
self, blocks: list[_NestedBlockList]
|
|
32
|
+
) -> list[BlockCreatePayload]:
|
|
31
33
|
flattened: list[BlockCreatePayload] = []
|
|
32
34
|
|
|
33
35
|
for item in blocks:
|
|
@@ -111,4 +113,8 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
111
113
|
return content[:cutoff] + self.ELLIPSIS
|
|
112
114
|
|
|
113
115
|
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
114
|
-
return
|
|
116
|
+
return (
|
|
117
|
+
rich_text.type == RichTextType.TEXT
|
|
118
|
+
and rich_text.text is not None
|
|
119
|
+
and rich_text.text.content
|
|
120
|
+
)
|
|
@@ -9,7 +9,9 @@ class BlockPostProcessor:
|
|
|
9
9
|
def register(self, processor: PostProcessor) -> None:
|
|
10
10
|
self._processors.append(processor)
|
|
11
11
|
|
|
12
|
-
def process(
|
|
12
|
+
def process(
|
|
13
|
+
self, created_blocks: list[BlockCreatePayload]
|
|
14
|
+
) -> list[BlockCreatePayload]:
|
|
13
15
|
result = created_blocks
|
|
14
16
|
for processor in self._processors:
|
|
15
17
|
result = processor.process(created_blocks)
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
|
-
from notionary.exceptions.block_parsing import
|
|
4
|
+
from notionary.exceptions.block_parsing import (
|
|
5
|
+
InsufficientColumnsError,
|
|
6
|
+
InvalidColumnRatioSumError,
|
|
7
|
+
)
|
|
5
8
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
6
|
-
from notionary.page.content.syntax import
|
|
9
|
+
from notionary.page.content.syntax.definition import (
|
|
10
|
+
MarkdownGrammar,
|
|
11
|
+
SyntaxDefinitionRegistry,
|
|
12
|
+
)
|
|
7
13
|
from notionary.utils.decorators import time_execution_sync
|
|
8
14
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
9
15
|
|
|
@@ -13,15 +19,21 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
13
19
|
_MINIMUM_COLUMNS = 2
|
|
14
20
|
|
|
15
21
|
def __init__(
|
|
16
|
-
self,
|
|
22
|
+
self,
|
|
23
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
24
|
+
markdown_grammar: MarkdownGrammar | None = None,
|
|
17
25
|
) -> None:
|
|
18
26
|
super().__init__()
|
|
19
|
-
self._syntax_registry = syntax_registry or
|
|
27
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
20
28
|
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
21
29
|
|
|
22
30
|
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
23
|
-
self._column_list_delimiter =
|
|
24
|
-
|
|
31
|
+
self._column_list_delimiter = (
|
|
32
|
+
self._syntax_registry.get_column_list_syntax().start_delimiter
|
|
33
|
+
)
|
|
34
|
+
self._column_delimiter = (
|
|
35
|
+
self._syntax_registry.get_column_syntax().start_delimiter
|
|
36
|
+
)
|
|
25
37
|
self._column_pattern = self._syntax_registry.get_column_syntax().regex_pattern
|
|
26
38
|
|
|
27
39
|
@override
|
|
@@ -124,7 +136,9 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
124
136
|
total_ratio = sum(ratios)
|
|
125
137
|
|
|
126
138
|
if not self._is_ratio_sum_valid(total_ratio):
|
|
127
|
-
self.logger.error(
|
|
139
|
+
self.logger.error(
|
|
140
|
+
f"Column ratios must sum to 1.0 (±{self._RATIO_TOLERANCE}), but sum to {total_ratio:.4f}"
|
|
141
|
+
)
|
|
128
142
|
raise InvalidColumnRatioSumError(total_ratio, self._RATIO_TOLERANCE)
|
|
129
143
|
|
|
130
144
|
def _should_validate_ratios(self, ratios: list[float], column_count: int) -> bool:
|
|
@@ -2,21 +2,28 @@ import math
|
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
4
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
5
|
-
from notionary.page.content.syntax import
|
|
5
|
+
from notionary.page.content.syntax.definition import (
|
|
6
|
+
MarkdownGrammar,
|
|
7
|
+
SyntaxDefinitionRegistry,
|
|
8
|
+
)
|
|
6
9
|
from notionary.utils.decorators import time_execution_sync
|
|
7
10
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class IndentationNormalizer(PreProcessor, LoggingMixin):
|
|
11
14
|
def __init__(
|
|
12
|
-
self,
|
|
15
|
+
self,
|
|
16
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
17
|
+
markdown_grammar: MarkdownGrammar | None = None,
|
|
13
18
|
) -> None:
|
|
14
19
|
super().__init__()
|
|
15
|
-
self._syntax_registry = syntax_registry or
|
|
20
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
16
21
|
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
17
22
|
|
|
18
23
|
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
19
|
-
self._code_block_start_delimiter =
|
|
24
|
+
self._code_block_start_delimiter = (
|
|
25
|
+
self._syntax_registry.get_code_syntax().start_delimiter
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
@override
|
|
22
29
|
@time_execution_sync()
|
|
@@ -5,18 +5,22 @@ from urllib.parse import urlparse
|
|
|
5
5
|
from notionary.blocks.enums import VideoFileType
|
|
6
6
|
from notionary.exceptions import UnsupportedVideoFormatError
|
|
7
7
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
8
|
-
from notionary.page.content.syntax import
|
|
8
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
9
|
from notionary.utils.decorators import time_execution_sync
|
|
10
10
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class VideoFormatPreProcessor(PreProcessor, LoggingMixin):
|
|
14
|
-
YOUTUBE_WATCH_PATTERN = re.compile(
|
|
15
|
-
|
|
14
|
+
YOUTUBE_WATCH_PATTERN = re.compile(
|
|
15
|
+
r"^https?://(?:www\.)?youtube\.com/watch\?.*v=[\w-]+", re.IGNORECASE
|
|
16
|
+
)
|
|
17
|
+
YOUTUBE_EMBED_PATTERN = re.compile(
|
|
18
|
+
r"^https?://(?:www\.)?youtube\.com/embed/[\w-]+", re.IGNORECASE
|
|
19
|
+
)
|
|
16
20
|
|
|
17
|
-
def __init__(self, syntax_registry:
|
|
21
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry | None = None) -> None:
|
|
18
22
|
super().__init__()
|
|
19
|
-
self._syntax_registry = syntax_registry or
|
|
23
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
20
24
|
self._video_syntax = self._syntax_registry.get_video_syntax()
|
|
21
25
|
|
|
22
26
|
@override
|
|
@@ -53,7 +57,10 @@ class VideoFormatPreProcessor(PreProcessor, LoggingMixin):
|
|
|
53
57
|
)
|
|
54
58
|
|
|
55
59
|
def _is_youtube_video(self, url: str) -> bool:
|
|
56
|
-
return bool(
|
|
60
|
+
return bool(
|
|
61
|
+
self.YOUTUBE_WATCH_PATTERN.match(url)
|
|
62
|
+
or self.YOUTUBE_EMBED_PATTERN.match(url)
|
|
63
|
+
)
|
|
57
64
|
|
|
58
65
|
def _has_valid_video_extension(self, url: str) -> bool:
|
|
59
66
|
return VideoFileType.is_valid_extension(url)
|
|
@@ -11,7 +11,10 @@ from notionary.utils.mixins.logging import LoggingMixin
|
|
|
11
11
|
|
|
12
12
|
class MarkdownToNotionConverter(LoggingMixin):
|
|
13
13
|
def __init__(
|
|
14
|
-
self,
|
|
14
|
+
self,
|
|
15
|
+
line_parser: LineParser,
|
|
16
|
+
pre_processor: MarkdownPreProcessor,
|
|
17
|
+
post_processor: BlockPostProcessor,
|
|
15
18
|
) -> None:
|
|
16
19
|
self._line_parser = line_parser
|
|
17
20
|
self._pre_processor = pre_processor
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from collections.abc import Awaitable, Callable
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.schemas import Block
|
|
4
|
-
from notionary.page.content.syntax.grammar import MarkdownGrammar
|
|
4
|
+
from notionary.page.content.syntax.definition.grammar import MarkdownGrammar
|
|
5
5
|
|
|
6
6
|
ConvertChildrenCallback = Callable[[list[Block], int], Awaitable[str]]
|
|
7
7
|
|
|
@@ -25,14 +25,20 @@ class MarkdownRenderingContext:
|
|
|
25
25
|
async def render_children(self) -> str:
|
|
26
26
|
return await self._convert_children_to_markdown(self.indent_level)
|
|
27
27
|
|
|
28
|
-
async def render_children_with_additional_indent(
|
|
29
|
-
|
|
28
|
+
async def render_children_with_additional_indent(
|
|
29
|
+
self, additional_indent: int
|
|
30
|
+
) -> str:
|
|
31
|
+
return await self._convert_children_to_markdown(
|
|
32
|
+
self.indent_level + additional_indent
|
|
33
|
+
)
|
|
30
34
|
|
|
31
35
|
async def _convert_children_to_markdown(self, indent_level: int) -> str:
|
|
32
36
|
if not self._has_children() or not self.convert_children_callback:
|
|
33
37
|
return ""
|
|
34
38
|
|
|
35
|
-
return await self.convert_children_callback(
|
|
39
|
+
return await self.convert_children_callback(
|
|
40
|
+
self._get_children_blocks(), indent_level
|
|
41
|
+
)
|
|
36
42
|
|
|
37
43
|
def _get_children_blocks(self) -> list[Block]:
|
|
38
44
|
if self._has_children():
|
|
@@ -40,7 +46,11 @@ class MarkdownRenderingContext:
|
|
|
40
46
|
return []
|
|
41
47
|
|
|
42
48
|
def _has_children(self) -> bool:
|
|
43
|
-
return
|
|
49
|
+
return (
|
|
50
|
+
self.block.has_children
|
|
51
|
+
and self.block.children
|
|
52
|
+
and len(self.block.children) > 0
|
|
53
|
+
)
|
|
44
54
|
|
|
45
55
|
def indent_text(self, text: str) -> str:
|
|
46
56
|
if not text:
|
|
@@ -29,17 +29,19 @@ from notionary.page.content.renderer.renderers import (
|
|
|
29
29
|
ToggleRenderer,
|
|
30
30
|
VideoRenderer,
|
|
31
31
|
)
|
|
32
|
-
from notionary.page.content.syntax import
|
|
32
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class RendererChainFactory:
|
|
36
36
|
def __init__(
|
|
37
37
|
self,
|
|
38
38
|
rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
|
|
39
|
-
syntax_registry:
|
|
39
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
40
40
|
) -> None:
|
|
41
|
-
self._rich_text_markdown_converter =
|
|
42
|
-
|
|
41
|
+
self._rich_text_markdown_converter = (
|
|
42
|
+
rich_text_markdown_converter or RichTextToMarkdownConverter()
|
|
43
|
+
)
|
|
44
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
43
45
|
|
|
44
46
|
def create(self) -> BlockRenderer:
|
|
45
47
|
# Strukturelle Blocks
|
|
@@ -219,13 +221,17 @@ class RendererChainFactory:
|
|
|
219
221
|
return BreadcrumbRenderer(syntax_registry=self._syntax_registry)
|
|
220
222
|
|
|
221
223
|
def _create_table_renderer(self) -> TableRenderer:
|
|
222
|
-
return TableRenderer(
|
|
224
|
+
return TableRenderer(
|
|
225
|
+
rich_text_markdown_converter=self._rich_text_markdown_converter
|
|
226
|
+
)
|
|
223
227
|
|
|
224
228
|
def _create_table_row_handler(self) -> TableRowHandler:
|
|
225
229
|
return TableRowHandler()
|
|
226
230
|
|
|
227
231
|
def _create_paragraph_renderer(self) -> ParagraphRenderer:
|
|
228
|
-
return ParagraphRenderer(
|
|
232
|
+
return ParagraphRenderer(
|
|
233
|
+
rich_text_markdown_converter=self._rich_text_markdown_converter
|
|
234
|
+
)
|
|
229
235
|
|
|
230
236
|
def _create_fallback_renderer(self) -> FallbackRenderer:
|
|
231
237
|
return FallbackRenderer()
|
|
@@ -3,7 +3,7 @@ from enum import IntEnum
|
|
|
3
3
|
from typing import override
|
|
4
4
|
|
|
5
5
|
from notionary.page.content.renderer.post_processing.port import PostProcessor
|
|
6
|
-
from notionary.page.content.syntax.grammar import MarkdownGrammar
|
|
6
|
+
from notionary.page.content.syntax.definition.grammar import MarkdownGrammar
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class _NumberingStyle(IntEnum):
|
|
@@ -89,7 +89,9 @@ class NumberedListPlaceholderReplacerPostProcessor(PostProcessor):
|
|
|
89
89
|
def __init__(self, markdown_grammar: MarkdownGrammar | None = None) -> None:
|
|
90
90
|
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
91
91
|
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
92
|
-
self._numbered_list_placeholder =
|
|
92
|
+
self._numbered_list_placeholder = (
|
|
93
|
+
self._markdown_grammar.numbered_list_placeholder
|
|
94
|
+
)
|
|
93
95
|
|
|
94
96
|
@override
|
|
95
97
|
def process(self, markdown_text: str) -> str:
|
|
@@ -130,23 +132,31 @@ class NumberedListPlaceholderReplacerPostProcessor(PostProcessor):
|
|
|
130
132
|
return match.group(1) if match else ""
|
|
131
133
|
|
|
132
134
|
def _extract_content(self, line: str) -> str:
|
|
133
|
-
match = re.match(
|
|
135
|
+
match = re.match(
|
|
136
|
+
rf"^\s*{re.escape(self._numbered_list_placeholder)}\.\s*(.*)", line
|
|
137
|
+
)
|
|
134
138
|
return match.group(1) if match else ""
|
|
135
139
|
|
|
136
140
|
def _is_placeholder_list_item(self, line: str) -> bool:
|
|
137
|
-
return bool(
|
|
141
|
+
return bool(
|
|
142
|
+
re.match(rf"^\s*{re.escape(self._numbered_list_placeholder)}\.", line)
|
|
143
|
+
)
|
|
138
144
|
|
|
139
|
-
def _is_blank_between_list_items(
|
|
145
|
+
def _is_blank_between_list_items(
|
|
146
|
+
self, lines: list[str], current_index: int, processed_lines: list[str]
|
|
147
|
+
) -> bool:
|
|
140
148
|
if not self._is_blank(lines[current_index]):
|
|
141
149
|
return False
|
|
142
150
|
|
|
143
|
-
previous_line_was_list_item =
|
|
151
|
+
previous_line_was_list_item = (
|
|
152
|
+
processed_lines and self._looks_like_numbered_list_item(processed_lines[-1])
|
|
153
|
+
)
|
|
144
154
|
if not previous_line_was_list_item:
|
|
145
155
|
return False
|
|
146
156
|
|
|
147
|
-
next_line_is_list_item = current_index + 1 < len(
|
|
148
|
-
lines
|
|
149
|
-
)
|
|
157
|
+
next_line_is_list_item = current_index + 1 < len(
|
|
158
|
+
lines
|
|
159
|
+
) and self._is_placeholder_list_item(lines[current_index + 1])
|
|
150
160
|
return next_line_is_list_item
|
|
151
161
|
|
|
152
162
|
def _is_blank(self, line: str) -> bool:
|
|
@@ -1,31 +1,28 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
from notionary.blocks.schemas import (
|
|
4
|
+
Block,
|
|
5
|
+
BlockType,
|
|
6
|
+
ExternalFileWithCaption,
|
|
7
|
+
NotionHostedFileWithCaption,
|
|
8
|
+
)
|
|
9
|
+
from notionary.page.content.renderer.renderers.file_like_block import (
|
|
10
|
+
FileLikeBlockRenderer,
|
|
11
|
+
)
|
|
12
|
+
from notionary.page.content.syntax.definition import EnclosedSyntaxDefinition
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AudioRenderer(FileLikeBlockRenderer):
|
|
8
16
|
@override
|
|
9
17
|
def _can_handle(self, block: Block) -> bool:
|
|
10
18
|
return block.type == BlockType.AUDIO
|
|
11
19
|
|
|
12
20
|
@override
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
if not url:
|
|
17
|
-
return ""
|
|
18
|
-
|
|
19
|
-
syntax = self._syntax_registry.get_audio_syntax()
|
|
20
|
-
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
21
|
+
def _get_syntax(self) -> EnclosedSyntaxDefinition:
|
|
22
|
+
return self._syntax_registry.get_audio_syntax()
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
return block.audio.external.url or ""
|
|
28
|
-
elif block.audio.file:
|
|
29
|
-
return block.audio.file.url or ""
|
|
30
|
-
|
|
31
|
-
return ""
|
|
24
|
+
@override
|
|
25
|
+
def _get_file_data(
|
|
26
|
+
self, block: Block
|
|
27
|
+
) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
28
|
+
return block.audio
|