notionary 0.2.28__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +9 -2
- notionary/blocks/__init__.py +5 -0
- notionary/blocks/client.py +6 -4
- notionary/blocks/enums.py +28 -1
- notionary/blocks/rich_text/markdown_rich_text_converter.py +14 -0
- notionary/blocks/rich_text/models.py +14 -0
- notionary/blocks/rich_text/name_id_resolver/__init__.py +2 -0
- notionary/blocks/rich_text/name_id_resolver/data_source.py +32 -0
- notionary/blocks/rich_text/rich_text_markdown_converter.py +12 -0
- notionary/blocks/rich_text/rich_text_patterns.py +3 -0
- notionary/blocks/schemas.py +42 -10
- notionary/comments/__init__.py +5 -0
- notionary/comments/client.py +7 -10
- notionary/comments/factory.py +4 -6
- notionary/data_source/http/data_source_instance_client.py +14 -4
- notionary/data_source/properties/{models.py → schemas.py} +4 -8
- notionary/data_source/query/__init__.py +9 -0
- notionary/data_source/query/builder.py +38 -10
- notionary/data_source/query/schema.py +13 -10
- notionary/data_source/query/validator.py +11 -11
- notionary/data_source/schema/registry.py +104 -0
- notionary/data_source/schema/service.py +136 -0
- notionary/data_source/schemas.py +1 -1
- notionary/data_source/service.py +29 -103
- notionary/database/service.py +17 -60
- notionary/exceptions/__init__.py +5 -1
- notionary/exceptions/block_parsing.py +21 -0
- notionary/exceptions/search.py +24 -0
- notionary/http/client.py +9 -10
- notionary/http/models.py +5 -4
- notionary/page/content/factory.py +10 -3
- notionary/page/content/markdown/builder.py +76 -154
- notionary/page/content/markdown/nodes/__init__.py +0 -2
- notionary/page/content/markdown/nodes/audio.py +1 -1
- notionary/page/content/markdown/nodes/base.py +1 -1
- notionary/page/content/markdown/nodes/bookmark.py +1 -1
- notionary/page/content/markdown/nodes/breadcrumb.py +1 -1
- notionary/page/content/markdown/nodes/bulleted_list.py +31 -8
- notionary/page/content/markdown/nodes/callout.py +12 -10
- notionary/page/content/markdown/nodes/code.py +3 -5
- notionary/page/content/markdown/nodes/columns.py +39 -21
- notionary/page/content/markdown/nodes/container.py +64 -0
- notionary/page/content/markdown/nodes/divider.py +1 -1
- notionary/page/content/markdown/nodes/embed.py +1 -1
- notionary/page/content/markdown/nodes/equation.py +1 -1
- notionary/page/content/markdown/nodes/file.py +1 -1
- notionary/page/content/markdown/nodes/heading.py +26 -6
- notionary/page/content/markdown/nodes/image.py +1 -1
- notionary/page/content/markdown/nodes/mixins/__init__.py +5 -0
- notionary/page/content/markdown/nodes/mixins/caption.py +1 -1
- notionary/page/content/markdown/nodes/numbered_list.py +28 -5
- notionary/page/content/markdown/nodes/paragraph.py +1 -1
- notionary/page/content/markdown/nodes/pdf.py +1 -1
- notionary/page/content/markdown/nodes/quote.py +17 -5
- notionary/page/content/markdown/nodes/space.py +1 -1
- notionary/page/content/markdown/nodes/table.py +1 -1
- notionary/page/content/markdown/nodes/table_of_contents.py +1 -1
- notionary/page/content/markdown/nodes/todo.py +23 -7
- notionary/page/content/markdown/nodes/toggle.py +13 -14
- notionary/page/content/markdown/nodes/video.py +1 -1
- notionary/page/content/parser/context.py +98 -21
- notionary/page/content/parser/factory.py +1 -10
- notionary/page/content/parser/parsers/__init__.py +0 -2
- notionary/page/content/parser/parsers/audio.py +1 -1
- notionary/page/content/parser/parsers/base.py +1 -1
- notionary/page/content/parser/parsers/bookmark.py +1 -1
- notionary/page/content/parser/parsers/breadcrumb.py +1 -1
- notionary/page/content/parser/parsers/bulleted_list.py +52 -8
- notionary/page/content/parser/parsers/callout.py +55 -84
- notionary/page/content/parser/parsers/caption.py +1 -1
- notionary/page/content/parser/parsers/code.py +5 -5
- notionary/page/content/parser/parsers/column.py +23 -64
- notionary/page/content/parser/parsers/column_list.py +45 -45
- notionary/page/content/parser/parsers/divider.py +1 -1
- notionary/page/content/parser/parsers/embed.py +1 -1
- notionary/page/content/parser/parsers/equation.py +1 -1
- notionary/page/content/parser/parsers/file.py +1 -1
- notionary/page/content/parser/parsers/heading.py +65 -8
- notionary/page/content/parser/parsers/image.py +1 -1
- notionary/page/content/parser/parsers/numbered_list.py +52 -8
- notionary/page/content/parser/parsers/paragraph.py +3 -2
- notionary/page/content/parser/parsers/pdf.py +1 -1
- notionary/page/content/parser/parsers/quote.py +75 -15
- notionary/page/content/parser/parsers/space.py +14 -8
- notionary/page/content/parser/parsers/table.py +1 -1
- notionary/page/content/parser/parsers/table_of_contents.py +1 -1
- notionary/page/content/parser/parsers/todo.py +57 -19
- notionary/page/content/parser/parsers/toggle.py +17 -74
- notionary/page/content/parser/parsers/video.py +1 -1
- notionary/page/content/parser/post_processing/handlers/rich_text_length.py +6 -4
- notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +43 -22
- notionary/page/content/parser/pre_processsing/handlers/__init__.py +4 -0
- notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +108 -54
- notionary/page/content/parser/pre_processsing/handlers/indentation.py +86 -0
- notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +66 -0
- notionary/page/content/parser/pre_processsing/handlers/whitespace.py +14 -7
- notionary/page/content/parser/service.py +9 -0
- notionary/page/content/renderer/context.py +5 -2
- notionary/page/content/renderer/factory.py +2 -11
- notionary/page/content/renderer/post_processing/handlers/__init__.py +2 -2
- notionary/page/content/renderer/post_processing/handlers/numbered_list.py +156 -0
- notionary/page/content/renderer/renderers/__init__.py +0 -2
- notionary/page/content/renderer/renderers/base.py +1 -1
- notionary/page/content/renderer/renderers/bulleted_list.py +1 -1
- notionary/page/content/renderer/renderers/callout.py +6 -21
- notionary/page/content/renderer/renderers/captioned_block.py +1 -1
- notionary/page/content/renderer/renderers/column.py +28 -19
- notionary/page/content/renderer/renderers/column_list.py +24 -11
- notionary/page/content/renderer/renderers/heading.py +53 -27
- notionary/page/content/renderer/renderers/numbered_list.py +6 -5
- notionary/page/content/renderer/renderers/quote.py +1 -1
- notionary/page/content/renderer/renderers/todo.py +1 -1
- notionary/page/content/renderer/renderers/toggle.py +6 -7
- notionary/page/content/service.py +4 -1
- notionary/page/content/syntax/__init__.py +4 -0
- notionary/page/content/syntax/grammar.py +10 -0
- notionary/page/content/syntax/models.py +0 -2
- notionary/page/content/syntax/{service.py → registry.py} +31 -91
- notionary/page/properties/client.py +3 -3
- notionary/page/properties/models.py +3 -2
- notionary/page/properties/service.py +18 -3
- notionary/page/service.py +22 -80
- notionary/shared/entity/service.py +94 -36
- notionary/shared/models/cover.py +1 -1
- notionary/shared/typings.py +3 -0
- notionary/user/base.py +60 -11
- notionary/user/factory.py +0 -0
- notionary/utils/decorators.py +122 -0
- notionary/utils/fuzzy.py +18 -6
- notionary/utils/mixins/logging.py +38 -27
- notionary/utils/pagination.py +70 -16
- notionary/workspace/__init__.py +2 -1
- notionary/workspace/client.py +4 -2
- notionary/workspace/query/__init__.py +3 -0
- notionary/workspace/query/builder.py +25 -1
- notionary/workspace/query/models.py +12 -3
- notionary/workspace/query/service.py +57 -32
- notionary/workspace/service.py +31 -21
- {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/METADATA +35 -105
- notionary-0.3.1.dist-info/RECORD +211 -0
- notionary/page/content/markdown/nodes/toggleable_heading.py +0 -35
- notionary/page/content/parser/parsers/toggleable_heading.py +0 -150
- notionary/page/content/renderer/post_processing/handlers/numbered_list_placeholdere.py +0 -62
- notionary/page/content/renderer/renderers/toggleable_heading.py +0 -78
- notionary/utils/async_retry.py +0 -39
- notionary/utils/singleton.py +0 -13
- notionary-0.2.28.dist-info/RECORD +0 -200
- {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/WHEEL +0 -0
- {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,13 +6,13 @@ from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
|
6
6
|
from notionary.blocks.schemas import (
|
|
7
7
|
BlockColor,
|
|
8
8
|
CreateNumberedListItemBlock,
|
|
9
|
-
|
|
9
|
+
CreateNumberedListItemData,
|
|
10
10
|
)
|
|
11
11
|
from notionary.page.content.parser.parsers.base import (
|
|
12
12
|
BlockParsingContext,
|
|
13
13
|
LineParser,
|
|
14
14
|
)
|
|
15
|
-
from notionary.page.content.syntax
|
|
15
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class NumberedListParser(LineParser):
|
|
@@ -25,21 +25,65 @@ class NumberedListParser(LineParser):
|
|
|
25
25
|
def _can_handle(self, context: BlockParsingContext) -> bool:
|
|
26
26
|
if context.is_inside_parent_context():
|
|
27
27
|
return False
|
|
28
|
-
return self.
|
|
28
|
+
return self._is_numbered_list_line(context.line)
|
|
29
|
+
|
|
30
|
+
def _is_numbered_list_line(self, line: str) -> bool:
|
|
31
|
+
return self._syntax.regex_pattern.match(line) is not None
|
|
29
32
|
|
|
30
33
|
@override
|
|
31
34
|
async def _process(self, context: BlockParsingContext) -> None:
|
|
32
35
|
block = await self._create_numbered_list_block(context.line)
|
|
33
|
-
if block:
|
|
34
|
-
|
|
36
|
+
if not block:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
await self._process_nested_children(block, context)
|
|
40
|
+
context.result_blocks.append(block)
|
|
41
|
+
|
|
42
|
+
async def _process_nested_children(self, block: CreateNumberedListItemBlock, context: BlockParsingContext) -> None:
|
|
43
|
+
child_lines = self._collect_child_lines(context)
|
|
44
|
+
if not child_lines:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
child_blocks = await self._parse_child_blocks(child_lines, context)
|
|
48
|
+
if child_blocks:
|
|
49
|
+
block.numbered_list_item.children = child_blocks
|
|
50
|
+
|
|
51
|
+
context.lines_consumed = len(child_lines)
|
|
52
|
+
|
|
53
|
+
def _collect_child_lines(self, context: BlockParsingContext) -> list[str]:
|
|
54
|
+
parent_indent_level = context.get_line_indentation_level()
|
|
55
|
+
return context.collect_indented_child_lines(parent_indent_level)
|
|
56
|
+
|
|
57
|
+
async def _parse_child_blocks(
|
|
58
|
+
self, child_lines: list[str], context: BlockParsingContext
|
|
59
|
+
) -> list[CreateNumberedListItemBlock]:
|
|
60
|
+
stripped_lines = self._remove_parent_indentation(child_lines, context)
|
|
61
|
+
children_text = self._convert_lines_to_text(stripped_lines)
|
|
62
|
+
return await context.parse_nested_markdown(children_text)
|
|
63
|
+
|
|
64
|
+
def _remove_parent_indentation(self, lines: list[str], context: BlockParsingContext) -> list[str]:
|
|
65
|
+
return context.strip_indentation_level(lines, levels=1)
|
|
66
|
+
|
|
67
|
+
def _convert_lines_to_text(self, lines: list[str]) -> str:
|
|
68
|
+
return "\n".join(lines)
|
|
35
69
|
|
|
36
70
|
async def _create_numbered_list_block(self, text: str) -> CreateNumberedListItemBlock | None:
|
|
71
|
+
content = self._extract_list_content(text)
|
|
72
|
+
if content is None:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
rich_text = await self._convert_to_rich_text(content)
|
|
76
|
+
return self._build_block(rich_text)
|
|
77
|
+
|
|
78
|
+
def _extract_list_content(self, text: str) -> str | None:
|
|
37
79
|
match = self._syntax.regex_pattern.match(text)
|
|
38
80
|
if not match:
|
|
39
81
|
return None
|
|
82
|
+
return match.group(3)
|
|
40
83
|
|
|
41
|
-
|
|
42
|
-
|
|
84
|
+
async def _convert_to_rich_text(self, content: str):
|
|
85
|
+
return await self._rich_text_converter.to_rich_text(content)
|
|
43
86
|
|
|
44
|
-
|
|
87
|
+
def _build_block(self, rich_text) -> CreateNumberedListItemBlock:
|
|
88
|
+
numbered_list_content = CreateNumberedListItemData(rich_text=rich_text, color=BlockColor.DEFAULT)
|
|
45
89
|
return CreateNumberedListItemBlock(numbered_list_item=numbered_list_content)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
+
from notionary.blocks.enums import BlockColor
|
|
3
4
|
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
4
5
|
MarkdownRichTextConverter,
|
|
5
6
|
)
|
|
6
|
-
from notionary.blocks.schemas import
|
|
7
|
+
from notionary.blocks.schemas import CreateParagraphBlock, CreateParagraphData
|
|
7
8
|
from notionary.page.content.parser.parsers.base import (
|
|
8
9
|
BlockParsingContext,
|
|
9
10
|
LineParser,
|
|
@@ -32,5 +33,5 @@ class ParagraphParser(LineParser):
|
|
|
32
33
|
return None
|
|
33
34
|
|
|
34
35
|
rich_text = await self._rich_text_converter.to_rich_text(text)
|
|
35
|
-
paragraph_content =
|
|
36
|
+
paragraph_content = CreateParagraphData(rich_text=rich_text, color=BlockColor.DEFAULT)
|
|
36
37
|
return CreateParagraphBlock(paragraph=paragraph_content)
|
|
@@ -9,7 +9,7 @@ from notionary.blocks.schemas import (
|
|
|
9
9
|
FileType,
|
|
10
10
|
)
|
|
11
11
|
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
12
|
-
from notionary.page.content.syntax
|
|
12
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PdfParser(LineParser):
|
|
@@ -6,7 +6,7 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
6
6
|
BlockParsingContext,
|
|
7
7
|
LineParser,
|
|
8
8
|
)
|
|
9
|
-
from notionary.page.content.syntax
|
|
9
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class QuoteParser(LineParser):
|
|
@@ -21,15 +21,22 @@ class QuoteParser(LineParser):
|
|
|
21
21
|
return False
|
|
22
22
|
return self._is_quote(context.line)
|
|
23
23
|
|
|
24
|
+
def _is_quote(self, line: str) -> bool:
|
|
25
|
+
return self._syntax.regex_pattern.match(line) is not None
|
|
26
|
+
|
|
24
27
|
@override
|
|
25
28
|
async def _process(self, context: BlockParsingContext) -> None:
|
|
26
29
|
quote_lines = self._collect_quote_lines(context)
|
|
27
|
-
lines_consumed = len(quote_lines)
|
|
28
30
|
|
|
29
31
|
block = await self._create_quote_block(quote_lines)
|
|
30
|
-
if block:
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
if not block:
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
# Lines consumed: all quote lines minus the current line (which is already being processed)
|
|
36
|
+
context.lines_consumed = len(quote_lines) - 1
|
|
37
|
+
|
|
38
|
+
await self._process_nested_children(block, context, quote_lines)
|
|
39
|
+
context.result_blocks.append(block)
|
|
33
40
|
|
|
34
41
|
def _collect_quote_lines(self, context: BlockParsingContext) -> list[str]:
|
|
35
42
|
quote_lines = [context.line]
|
|
@@ -39,27 +46,80 @@ class QuoteParser(LineParser):
|
|
|
39
46
|
quote_lines.append(line)
|
|
40
47
|
return quote_lines
|
|
41
48
|
|
|
42
|
-
def
|
|
43
|
-
|
|
49
|
+
async def _process_nested_children(
|
|
50
|
+
self, block: CreateQuoteBlock, context: BlockParsingContext, quote_lines: list[str]
|
|
51
|
+
) -> None:
|
|
52
|
+
# Calculate indent level after all quote lines
|
|
53
|
+
last_quote_line_index = len(quote_lines) - 1
|
|
54
|
+
child_lines = self._collect_child_lines_after_quote(context, last_quote_line_index)
|
|
55
|
+
|
|
56
|
+
if not child_lines:
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
child_blocks = await self._parse_child_blocks(child_lines, context)
|
|
60
|
+
if child_blocks:
|
|
61
|
+
block.quote.children = child_blocks
|
|
62
|
+
|
|
63
|
+
context.lines_consumed += len(child_lines)
|
|
64
|
+
|
|
65
|
+
def _collect_child_lines_after_quote(self, context: BlockParsingContext, last_quote_index: int) -> list[str]:
|
|
66
|
+
"""Collect indented children after the quote block."""
|
|
67
|
+
parent_indent_level = context.get_line_indentation_level()
|
|
68
|
+
remaining_lines = context.get_remaining_lines()
|
|
69
|
+
|
|
70
|
+
# Skip the quote lines we already processed
|
|
71
|
+
lines_after_quote = remaining_lines[last_quote_index:]
|
|
72
|
+
|
|
73
|
+
child_lines = []
|
|
74
|
+
expected_child_indent = parent_indent_level + 1
|
|
75
|
+
|
|
76
|
+
for line in lines_after_quote:
|
|
77
|
+
if not line.strip():
|
|
78
|
+
child_lines.append(line)
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
line_indent = context.get_line_indentation_level(line)
|
|
82
|
+
if line_indent >= expected_child_indent:
|
|
83
|
+
child_lines.append(line)
|
|
84
|
+
else:
|
|
85
|
+
break
|
|
86
|
+
|
|
87
|
+
return child_lines
|
|
88
|
+
|
|
89
|
+
async def _parse_child_blocks(self, child_lines: list[str], context: BlockParsingContext) -> list[CreateQuoteBlock]:
|
|
90
|
+
stripped_lines = self._remove_parent_indentation(child_lines, context)
|
|
91
|
+
children_text = self._convert_lines_to_text(stripped_lines)
|
|
92
|
+
return await context.parse_nested_markdown(children_text)
|
|
93
|
+
|
|
94
|
+
def _remove_parent_indentation(self, lines: list[str], context: BlockParsingContext) -> list[str]:
|
|
95
|
+
return context.strip_indentation_level(lines, levels=1)
|
|
96
|
+
|
|
97
|
+
def _convert_lines_to_text(self, lines: list[str]) -> str:
|
|
98
|
+
return "\n".join(lines)
|
|
44
99
|
|
|
45
100
|
async def _create_quote_block(self, quote_lines: list[str]) -> CreateQuoteBlock | None:
|
|
46
|
-
|
|
101
|
+
contents = self._extract_quote_contents(quote_lines)
|
|
102
|
+
if not contents:
|
|
47
103
|
return None
|
|
48
104
|
|
|
105
|
+
content = self._join_contents_for_multiline_quote(contents)
|
|
106
|
+
rich_text = await self._convert_to_rich_text(content)
|
|
107
|
+
return self._build_block(rich_text)
|
|
108
|
+
|
|
109
|
+
def _extract_quote_contents(self, quote_lines: list[str]) -> list[str]:
|
|
49
110
|
contents = []
|
|
50
111
|
for line in quote_lines:
|
|
51
112
|
match = self._syntax.regex_pattern.match(line)
|
|
52
113
|
if match:
|
|
53
114
|
contents.append(match.group(1).strip())
|
|
115
|
+
return contents
|
|
54
116
|
|
|
55
|
-
|
|
56
|
-
|
|
117
|
+
def _join_contents_for_multiline_quote(self, contents: list[str]) -> str:
|
|
118
|
+
return "\n".join(contents)
|
|
57
119
|
|
|
58
|
-
|
|
120
|
+
async def _convert_to_rich_text(self, content: str):
|
|
121
|
+
return await self._rich_text_converter.to_rich_text(content)
|
|
59
122
|
|
|
60
|
-
|
|
123
|
+
def _build_block(self, rich_text) -> CreateQuoteBlock:
|
|
61
124
|
quote_data = CreateQuoteData(rich_text=rich_text, color=BlockColor.DEFAULT)
|
|
62
125
|
return CreateQuoteBlock(quote=quote_data)
|
|
63
|
-
|
|
64
|
-
def _join_contents_for_multiline_quote(self, contents: list[str]) -> str:
|
|
65
|
-
return "\n".join(contents)
|
|
@@ -1,19 +1,15 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.
|
|
3
|
+
from notionary.blocks.enums import BlockColor
|
|
4
|
+
from notionary.blocks.schemas import CreateParagraphBlock, CreateParagraphData
|
|
4
5
|
from notionary.page.content.parser.parsers.base import (
|
|
5
6
|
BlockParsingContext,
|
|
6
7
|
LineParser,
|
|
7
8
|
)
|
|
8
|
-
from notionary.page.content.syntax
|
|
9
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class SpaceParser(LineParser):
|
|
12
|
-
"""
|
|
13
|
-
Parser for [space] markers that create empty paragraph blocks.
|
|
14
|
-
Uses SyntaxRegistry for centralized syntax definition.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
13
|
def __init__(self, syntax_registry: SyntaxRegistry) -> None:
|
|
18
14
|
super().__init__(syntax_registry)
|
|
19
15
|
self._syntax = syntax_registry.get_space_syntax()
|
|
@@ -22,8 +18,18 @@ class SpaceParser(LineParser):
|
|
|
22
18
|
def _can_handle(self, context: BlockParsingContext) -> bool:
|
|
23
19
|
if context.is_inside_parent_context():
|
|
24
20
|
return False
|
|
21
|
+
|
|
22
|
+
if self._is_explicit_space_marker(context):
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
return self._is_second_consecutive_empty_line(context)
|
|
26
|
+
|
|
27
|
+
def _is_explicit_space_marker(self, context: BlockParsingContext) -> bool:
|
|
25
28
|
return self._syntax.regex_pattern.match(context.line.strip()) is not None
|
|
26
29
|
|
|
30
|
+
def _is_second_consecutive_empty_line(self, context: BlockParsingContext) -> bool:
|
|
31
|
+
return context.line.strip() == "" and context.is_previous_line_empty
|
|
32
|
+
|
|
27
33
|
@override
|
|
28
34
|
async def _process(self, context: BlockParsingContext) -> None:
|
|
29
35
|
block = self._create_space_block()
|
|
@@ -31,5 +37,5 @@ class SpaceParser(LineParser):
|
|
|
31
37
|
context.result_blocks.append(block)
|
|
32
38
|
|
|
33
39
|
def _create_space_block(self) -> CreateParagraphBlock:
|
|
34
|
-
paragraph_data =
|
|
40
|
+
paragraph_data = CreateParagraphData(rich_text=[], color=BlockColor.DEFAULT)
|
|
35
41
|
return CreateParagraphBlock(paragraph=paragraph_data)
|
|
@@ -4,7 +4,7 @@ from notionary.blocks.rich_text.markdown_rich_text_converter import MarkdownRich
|
|
|
4
4
|
from notionary.blocks.rich_text.models import RichText
|
|
5
5
|
from notionary.blocks.schemas import CreateTableBlock, CreateTableData, CreateTableRowBlock, TableRowData
|
|
6
6
|
from notionary.page.content.parser.parsers import BlockParsingContext, LineParser
|
|
7
|
-
from notionary.page.content.syntax
|
|
7
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class TableParser(LineParser):
|
|
@@ -5,7 +5,7 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
5
5
|
BlockParsingContext,
|
|
6
6
|
LineParser,
|
|
7
7
|
)
|
|
8
|
-
from notionary.page.content.syntax
|
|
8
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class TableOfContentsParser(LineParser):
|
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
"""Parser for todo/checkbox blocks."""
|
|
2
|
-
|
|
3
1
|
from typing import override
|
|
4
2
|
|
|
5
3
|
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
6
4
|
MarkdownRichTextConverter,
|
|
7
5
|
)
|
|
8
|
-
from notionary.blocks.schemas import BlockColor, CreateToDoBlock,
|
|
6
|
+
from notionary.blocks.schemas import BlockColor, CreateToDoBlock, CreateToDoData
|
|
9
7
|
from notionary.page.content.parser.parsers.base import (
|
|
10
8
|
BlockParsingContext,
|
|
11
9
|
LineParser,
|
|
12
10
|
)
|
|
13
|
-
from notionary.page.content.syntax
|
|
11
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
class TodoParser(LineParser):
|
|
@@ -24,33 +22,73 @@ class TodoParser(LineParser):
|
|
|
24
22
|
def _can_handle(self, context: BlockParsingContext) -> bool:
|
|
25
23
|
if context.is_inside_parent_context():
|
|
26
24
|
return False
|
|
25
|
+
return self._is_todo_line(context.line)
|
|
27
26
|
|
|
27
|
+
def _is_todo_line(self, line: str) -> bool:
|
|
28
28
|
return (
|
|
29
|
-
self._syntax.regex_pattern.match(
|
|
30
|
-
or self._syntax_done.regex_pattern.match(
|
|
29
|
+
self._syntax.regex_pattern.match(line) is not None
|
|
30
|
+
or self._syntax_done.regex_pattern.match(line) is not None
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
@override
|
|
34
34
|
async def _process(self, context: BlockParsingContext) -> None:
|
|
35
35
|
block = await self._create_todo_block(context.line)
|
|
36
|
-
if block:
|
|
37
|
-
|
|
36
|
+
if not block:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
await self._process_nested_children(block, context)
|
|
40
|
+
context.result_blocks.append(block)
|
|
41
|
+
|
|
42
|
+
async def _process_nested_children(self, block: CreateToDoBlock, context: BlockParsingContext) -> None:
|
|
43
|
+
child_lines = self._collect_child_lines(context)
|
|
44
|
+
if not child_lines:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
child_blocks = await self._parse_child_blocks(child_lines, context)
|
|
48
|
+
if child_blocks:
|
|
49
|
+
block.to_do.children = child_blocks
|
|
50
|
+
|
|
51
|
+
context.lines_consumed = len(child_lines)
|
|
52
|
+
|
|
53
|
+
def _collect_child_lines(self, context: BlockParsingContext) -> list[str]:
|
|
54
|
+
parent_indent_level = context.get_line_indentation_level()
|
|
55
|
+
return context.collect_indented_child_lines(parent_indent_level)
|
|
56
|
+
|
|
57
|
+
async def _parse_child_blocks(self, child_lines: list[str], context: BlockParsingContext) -> list[CreateToDoBlock]:
|
|
58
|
+
stripped_lines = self._remove_parent_indentation(child_lines, context)
|
|
59
|
+
children_text = self._convert_lines_to_text(stripped_lines)
|
|
60
|
+
return await context.parse_nested_markdown(children_text)
|
|
61
|
+
|
|
62
|
+
def _remove_parent_indentation(self, lines: list[str], context: BlockParsingContext) -> list[str]:
|
|
63
|
+
return context.strip_indentation_level(lines, levels=1)
|
|
64
|
+
|
|
65
|
+
def _convert_lines_to_text(self, lines: list[str]) -> str:
|
|
66
|
+
return "\n".join(lines)
|
|
38
67
|
|
|
39
68
|
async def _create_todo_block(self, text: str) -> CreateToDoBlock | None:
|
|
40
|
-
|
|
41
|
-
|
|
69
|
+
content, checked = self._extract_todo_content(text)
|
|
70
|
+
if content is None:
|
|
71
|
+
return None
|
|
42
72
|
|
|
73
|
+
rich_text = await self._convert_to_rich_text(content)
|
|
74
|
+
return self._build_block(rich_text, checked)
|
|
75
|
+
|
|
76
|
+
def _extract_todo_content(self, text: str) -> tuple[str | None, bool]:
|
|
77
|
+
done_match = self._syntax_done.regex_pattern.match(text)
|
|
43
78
|
if done_match:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
79
|
+
return done_match.group(1), True
|
|
80
|
+
|
|
81
|
+
todo_match = self._syntax.regex_pattern.match(text)
|
|
82
|
+
if todo_match:
|
|
83
|
+
return todo_match.group(1), False
|
|
84
|
+
|
|
85
|
+
return None, False
|
|
86
|
+
|
|
87
|
+
async def _convert_to_rich_text(self, content: str):
|
|
88
|
+
return await self._rich_text_converter.to_rich_text(content)
|
|
51
89
|
|
|
52
|
-
|
|
53
|
-
todo_content =
|
|
90
|
+
def _build_block(self, rich_text, checked: bool) -> CreateToDoBlock:
|
|
91
|
+
todo_content = CreateToDoData(
|
|
54
92
|
rich_text=rich_text,
|
|
55
93
|
checked=checked,
|
|
56
94
|
color=BlockColor.DEFAULT,
|
|
@@ -5,9 +5,8 @@ from notionary.blocks.schemas import BlockColor, CreateToggleBlock, CreateToggle
|
|
|
5
5
|
from notionary.page.content.parser.parsers import (
|
|
6
6
|
BlockParsingContext,
|
|
7
7
|
LineParser,
|
|
8
|
-
ParentBlockContext,
|
|
9
8
|
)
|
|
10
|
-
from notionary.page.content.syntax
|
|
9
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
class ToggleParser(LineParser):
|
|
@@ -19,18 +18,12 @@ class ToggleParser(LineParser):
|
|
|
19
18
|
|
|
20
19
|
@override
|
|
21
20
|
def _can_handle(self, context: BlockParsingContext) -> bool:
|
|
22
|
-
return self._is_toggle_start(context)
|
|
21
|
+
return self._is_toggle_start(context)
|
|
23
22
|
|
|
24
23
|
@override
|
|
25
24
|
async def _process(self, context: BlockParsingContext) -> None:
|
|
26
25
|
if self._is_toggle_start(context):
|
|
27
|
-
await self.
|
|
28
|
-
|
|
29
|
-
if self._is_toggle_end(context):
|
|
30
|
-
await self._finalize_toggle(context)
|
|
31
|
-
|
|
32
|
-
if self._is_toggle_content(context):
|
|
33
|
-
self._add_toggle_content(context)
|
|
26
|
+
await self._process_toggle(context)
|
|
34
27
|
|
|
35
28
|
def _is_toggle_start(self, context: BlockParsingContext) -> bool:
|
|
36
29
|
if not self._syntax.regex_pattern.match(context.line):
|
|
@@ -42,26 +35,14 @@ class ToggleParser(LineParser):
|
|
|
42
35
|
def is_heading_start(self, line: str) -> bool:
|
|
43
36
|
return self._heading_syntax.regex_pattern.match(line) is not None
|
|
44
37
|
|
|
45
|
-
def
|
|
46
|
-
if not self._syntax.end_regex_pattern.match(context.line):
|
|
47
|
-
return False
|
|
48
|
-
|
|
49
|
-
if not context.parent_stack:
|
|
50
|
-
return False
|
|
51
|
-
|
|
52
|
-
current_parent = context.parent_stack[-1]
|
|
53
|
-
return isinstance(current_parent.block, CreateToggleBlock)
|
|
54
|
-
|
|
55
|
-
async def _start_toggle(self, context: BlockParsingContext) -> None:
|
|
38
|
+
async def _process_toggle(self, context: BlockParsingContext) -> None:
|
|
56
39
|
block = await self._create_toggle_block(context.line)
|
|
57
40
|
if not block:
|
|
58
41
|
return
|
|
59
42
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
)
|
|
64
|
-
context.parent_stack.append(parent_context)
|
|
43
|
+
await self._process_nested_children(block, context)
|
|
44
|
+
|
|
45
|
+
context.result_blocks.append(block)
|
|
65
46
|
|
|
66
47
|
async def _create_toggle_block(self, line: str) -> CreateToggleBlock | None:
|
|
67
48
|
if not (match := self._syntax.regex_pattern.match(line)):
|
|
@@ -73,55 +54,17 @@ class ToggleParser(LineParser):
|
|
|
73
54
|
toggle_content = CreateToggleData(rich_text=rich_text, color=BlockColor.DEFAULT, children=[])
|
|
74
55
|
return CreateToggleBlock(toggle=toggle_content)
|
|
75
56
|
|
|
76
|
-
async def
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
if self._is_nested_in_other_parent_context(context):
|
|
81
|
-
self._assign_to_parent_context(context, toggle_context)
|
|
82
|
-
else:
|
|
83
|
-
context.result_blocks.append(toggle_context.block)
|
|
84
|
-
|
|
85
|
-
def _is_nested_in_other_parent_context(self, context: BlockParsingContext) -> bool:
|
|
86
|
-
return context.parent_stack
|
|
57
|
+
async def _process_nested_children(self, block: CreateToggleBlock, context: BlockParsingContext) -> None:
|
|
58
|
+
parent_indent_level = context.get_line_indentation_level()
|
|
59
|
+
child_lines = context.collect_indented_child_lines(parent_indent_level)
|
|
87
60
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
parent_context.add_child_block(toggle_context.block)
|
|
91
|
-
|
|
92
|
-
async def _assign_toggle_children_if_any(
|
|
93
|
-
self, toggle_context: ParentBlockContext, context: BlockParsingContext
|
|
94
|
-
) -> None:
|
|
95
|
-
all_children = []
|
|
96
|
-
|
|
97
|
-
# Process text lines
|
|
98
|
-
if toggle_context.child_lines:
|
|
99
|
-
children_text = "\n".join(toggle_context.child_lines)
|
|
100
|
-
text_blocks = await self._parse_nested_content(children_text, context)
|
|
101
|
-
all_children.extend(text_blocks)
|
|
102
|
-
|
|
103
|
-
if toggle_context.child_blocks:
|
|
104
|
-
all_children.extend(toggle_context.child_blocks)
|
|
105
|
-
|
|
106
|
-
toggle_context.block.toggle.children = all_children
|
|
107
|
-
|
|
108
|
-
def _is_toggle_content(self, context: BlockParsingContext) -> bool:
|
|
109
|
-
if not context.parent_stack:
|
|
110
|
-
return False
|
|
111
|
-
|
|
112
|
-
current_parent = context.parent_stack[-1]
|
|
113
|
-
if not isinstance(current_parent.block, CreateToggleBlock):
|
|
114
|
-
return False
|
|
115
|
-
|
|
116
|
-
return not (
|
|
117
|
-
self._syntax.regex_pattern.match(context.line) or self._syntax.end_regex_pattern.match(context.line)
|
|
118
|
-
)
|
|
61
|
+
if not child_lines:
|
|
62
|
+
return
|
|
119
63
|
|
|
120
|
-
|
|
121
|
-
|
|
64
|
+
stripped_lines = context.strip_indentation_level(child_lines, levels=1)
|
|
65
|
+
child_markdown = "\n".join(stripped_lines)
|
|
122
66
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
return []
|
|
67
|
+
child_blocks = await context.parse_nested_markdown(child_markdown)
|
|
68
|
+
block.toggle.children = child_blocks
|
|
126
69
|
|
|
127
|
-
|
|
70
|
+
context.lines_consumed = len(child_lines)
|
|
@@ -9,7 +9,7 @@ from notionary.blocks.schemas import (
|
|
|
9
9
|
FileType,
|
|
10
10
|
)
|
|
11
11
|
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
12
|
-
from notionary.page.content.syntax
|
|
12
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class VideoParser(LineParser):
|
|
@@ -65,10 +65,12 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
65
65
|
self._truncate_rich_text_list(content.caption)
|
|
66
66
|
|
|
67
67
|
if hasattr(content, "children"):
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
self.
|
|
68
|
+
children = getattr(content, "children", None)
|
|
69
|
+
if children:
|
|
70
|
+
for child in children:
|
|
71
|
+
child_content = self._get_block_content(child)
|
|
72
|
+
if child_content:
|
|
73
|
+
self._truncate_content(child_content)
|
|
72
74
|
|
|
73
75
|
def _truncate_rich_text_list(self, rich_text_list: list[RichText]) -> None:
|
|
74
76
|
for rich_text in rich_text_list:
|
|
@@ -14,6 +14,7 @@ type _NestedBlockList = BlockCreatePayload | list["_NestedBlockList"]
|
|
|
14
14
|
|
|
15
15
|
class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
16
16
|
NOTION_MAX_LENGTH = 2000
|
|
17
|
+
ELLIPSIS = "..."
|
|
17
18
|
|
|
18
19
|
def __init__(self, max_text_length: int = NOTION_MAX_LENGTH) -> None:
|
|
19
20
|
self.max_text_length = max_text_length
|
|
@@ -58,36 +59,56 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
|
58
59
|
return None
|
|
59
60
|
|
|
60
61
|
def _truncate_content(self, content: object) -> None:
|
|
62
|
+
self._truncate_rich_text_fields(content)
|
|
63
|
+
self._truncate_children_recursively(content)
|
|
64
|
+
|
|
65
|
+
def _truncate_rich_text_fields(self, content: object) -> None:
|
|
61
66
|
if hasattr(content, "rich_text"):
|
|
62
67
|
self._truncate_rich_text_list(content.rich_text)
|
|
63
68
|
|
|
64
69
|
if hasattr(content, "caption"):
|
|
65
70
|
self._truncate_rich_text_list(content.caption)
|
|
66
71
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
def _truncate_children_recursively(self, content: object) -> None:
|
|
73
|
+
if not hasattr(content, "children"):
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
children = getattr(content, "children", None)
|
|
77
|
+
if not children:
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
for child in children:
|
|
81
|
+
self._truncate_child_content(child)
|
|
82
|
+
|
|
83
|
+
def _truncate_child_content(self, child: Any) -> None:
|
|
84
|
+
child_content = self._get_block_content(child)
|
|
85
|
+
if child_content:
|
|
86
|
+
self._truncate_content(child_content)
|
|
72
87
|
|
|
73
88
|
def _truncate_rich_text_list(self, rich_text_list: list[RichText]) -> None:
|
|
74
89
|
for rich_text in rich_text_list:
|
|
75
|
-
if
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
90
|
+
if self._should_truncate(rich_text):
|
|
91
|
+
self._truncate_single_rich_text(rich_text)
|
|
92
|
+
|
|
93
|
+
def _should_truncate(self, rich_text: RichText) -> bool:
|
|
94
|
+
if not self._is_text_type(rich_text):
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
return len(rich_text.text.content) > self.max_text_length
|
|
98
|
+
|
|
99
|
+
def _truncate_single_rich_text(self, rich_text: RichText) -> None:
|
|
100
|
+
original_length = len(rich_text.text.content)
|
|
101
|
+
rich_text.text.content = self._create_truncated_text(rich_text.text.content)
|
|
102
|
+
|
|
103
|
+
self.logger.warning(
|
|
104
|
+
"Truncating text content from %d to %d characters",
|
|
105
|
+
original_length,
|
|
106
|
+
self.max_text_length,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def _create_truncated_text(self, content: str) -> str:
|
|
110
|
+
cutoff = self.max_text_length - len(self.ELLIPSIS)
|
|
111
|
+
return content[:cutoff] + self.ELLIPSIS
|
|
91
112
|
|
|
92
113
|
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
93
|
-
return rich_text.type == RichTextType.TEXT and rich_text.text and rich_text.text.content
|
|
114
|
+
return rich_text.type == RichTextType.TEXT and rich_text.text is not None and rich_text.text.content
|