notionary 0.2.28__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. notionary/__init__.py +9 -2
  2. notionary/blocks/__init__.py +5 -0
  3. notionary/blocks/client.py +6 -4
  4. notionary/blocks/enums.py +28 -1
  5. notionary/blocks/rich_text/markdown_rich_text_converter.py +14 -0
  6. notionary/blocks/rich_text/models.py +14 -0
  7. notionary/blocks/rich_text/name_id_resolver/__init__.py +2 -0
  8. notionary/blocks/rich_text/name_id_resolver/data_source.py +32 -0
  9. notionary/blocks/rich_text/rich_text_markdown_converter.py +12 -0
  10. notionary/blocks/rich_text/rich_text_patterns.py +3 -0
  11. notionary/blocks/schemas.py +42 -10
  12. notionary/comments/__init__.py +5 -0
  13. notionary/comments/client.py +7 -10
  14. notionary/comments/factory.py +4 -6
  15. notionary/data_source/http/data_source_instance_client.py +14 -4
  16. notionary/data_source/properties/{models.py → schemas.py} +4 -8
  17. notionary/data_source/query/__init__.py +9 -0
  18. notionary/data_source/query/builder.py +38 -10
  19. notionary/data_source/query/schema.py +13 -10
  20. notionary/data_source/query/validator.py +11 -11
  21. notionary/data_source/schema/registry.py +104 -0
  22. notionary/data_source/schema/service.py +136 -0
  23. notionary/data_source/schemas.py +1 -1
  24. notionary/data_source/service.py +29 -103
  25. notionary/database/service.py +17 -60
  26. notionary/exceptions/__init__.py +5 -1
  27. notionary/exceptions/block_parsing.py +21 -0
  28. notionary/exceptions/search.py +24 -0
  29. notionary/http/client.py +9 -10
  30. notionary/http/models.py +5 -4
  31. notionary/page/content/factory.py +10 -3
  32. notionary/page/content/markdown/builder.py +76 -154
  33. notionary/page/content/markdown/nodes/__init__.py +0 -2
  34. notionary/page/content/markdown/nodes/audio.py +1 -1
  35. notionary/page/content/markdown/nodes/base.py +1 -1
  36. notionary/page/content/markdown/nodes/bookmark.py +1 -1
  37. notionary/page/content/markdown/nodes/breadcrumb.py +1 -1
  38. notionary/page/content/markdown/nodes/bulleted_list.py +31 -8
  39. notionary/page/content/markdown/nodes/callout.py +12 -10
  40. notionary/page/content/markdown/nodes/code.py +3 -5
  41. notionary/page/content/markdown/nodes/columns.py +39 -21
  42. notionary/page/content/markdown/nodes/container.py +64 -0
  43. notionary/page/content/markdown/nodes/divider.py +1 -1
  44. notionary/page/content/markdown/nodes/embed.py +1 -1
  45. notionary/page/content/markdown/nodes/equation.py +1 -1
  46. notionary/page/content/markdown/nodes/file.py +1 -1
  47. notionary/page/content/markdown/nodes/heading.py +26 -6
  48. notionary/page/content/markdown/nodes/image.py +1 -1
  49. notionary/page/content/markdown/nodes/mixins/__init__.py +5 -0
  50. notionary/page/content/markdown/nodes/mixins/caption.py +1 -1
  51. notionary/page/content/markdown/nodes/numbered_list.py +28 -5
  52. notionary/page/content/markdown/nodes/paragraph.py +1 -1
  53. notionary/page/content/markdown/nodes/pdf.py +1 -1
  54. notionary/page/content/markdown/nodes/quote.py +17 -5
  55. notionary/page/content/markdown/nodes/space.py +1 -1
  56. notionary/page/content/markdown/nodes/table.py +1 -1
  57. notionary/page/content/markdown/nodes/table_of_contents.py +1 -1
  58. notionary/page/content/markdown/nodes/todo.py +23 -7
  59. notionary/page/content/markdown/nodes/toggle.py +13 -14
  60. notionary/page/content/markdown/nodes/video.py +1 -1
  61. notionary/page/content/parser/context.py +98 -21
  62. notionary/page/content/parser/factory.py +1 -10
  63. notionary/page/content/parser/parsers/__init__.py +0 -2
  64. notionary/page/content/parser/parsers/audio.py +1 -1
  65. notionary/page/content/parser/parsers/base.py +1 -1
  66. notionary/page/content/parser/parsers/bookmark.py +1 -1
  67. notionary/page/content/parser/parsers/breadcrumb.py +1 -1
  68. notionary/page/content/parser/parsers/bulleted_list.py +52 -8
  69. notionary/page/content/parser/parsers/callout.py +55 -84
  70. notionary/page/content/parser/parsers/caption.py +1 -1
  71. notionary/page/content/parser/parsers/code.py +5 -5
  72. notionary/page/content/parser/parsers/column.py +23 -64
  73. notionary/page/content/parser/parsers/column_list.py +45 -45
  74. notionary/page/content/parser/parsers/divider.py +1 -1
  75. notionary/page/content/parser/parsers/embed.py +1 -1
  76. notionary/page/content/parser/parsers/equation.py +1 -1
  77. notionary/page/content/parser/parsers/file.py +1 -1
  78. notionary/page/content/parser/parsers/heading.py +65 -8
  79. notionary/page/content/parser/parsers/image.py +1 -1
  80. notionary/page/content/parser/parsers/numbered_list.py +52 -8
  81. notionary/page/content/parser/parsers/paragraph.py +3 -2
  82. notionary/page/content/parser/parsers/pdf.py +1 -1
  83. notionary/page/content/parser/parsers/quote.py +75 -15
  84. notionary/page/content/parser/parsers/space.py +14 -8
  85. notionary/page/content/parser/parsers/table.py +1 -1
  86. notionary/page/content/parser/parsers/table_of_contents.py +1 -1
  87. notionary/page/content/parser/parsers/todo.py +57 -19
  88. notionary/page/content/parser/parsers/toggle.py +17 -74
  89. notionary/page/content/parser/parsers/video.py +1 -1
  90. notionary/page/content/parser/post_processing/handlers/rich_text_length.py +6 -4
  91. notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +43 -22
  92. notionary/page/content/parser/pre_processsing/handlers/__init__.py +4 -0
  93. notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +108 -54
  94. notionary/page/content/parser/pre_processsing/handlers/indentation.py +86 -0
  95. notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +66 -0
  96. notionary/page/content/parser/pre_processsing/handlers/whitespace.py +14 -7
  97. notionary/page/content/parser/service.py +9 -0
  98. notionary/page/content/renderer/context.py +5 -2
  99. notionary/page/content/renderer/factory.py +2 -11
  100. notionary/page/content/renderer/post_processing/handlers/__init__.py +2 -2
  101. notionary/page/content/renderer/post_processing/handlers/numbered_list.py +156 -0
  102. notionary/page/content/renderer/renderers/__init__.py +0 -2
  103. notionary/page/content/renderer/renderers/base.py +1 -1
  104. notionary/page/content/renderer/renderers/bulleted_list.py +1 -1
  105. notionary/page/content/renderer/renderers/callout.py +6 -21
  106. notionary/page/content/renderer/renderers/captioned_block.py +1 -1
  107. notionary/page/content/renderer/renderers/column.py +28 -19
  108. notionary/page/content/renderer/renderers/column_list.py +24 -11
  109. notionary/page/content/renderer/renderers/heading.py +53 -27
  110. notionary/page/content/renderer/renderers/numbered_list.py +6 -5
  111. notionary/page/content/renderer/renderers/quote.py +1 -1
  112. notionary/page/content/renderer/renderers/todo.py +1 -1
  113. notionary/page/content/renderer/renderers/toggle.py +6 -7
  114. notionary/page/content/service.py +4 -1
  115. notionary/page/content/syntax/__init__.py +4 -0
  116. notionary/page/content/syntax/grammar.py +10 -0
  117. notionary/page/content/syntax/models.py +0 -2
  118. notionary/page/content/syntax/{service.py → registry.py} +31 -91
  119. notionary/page/properties/client.py +3 -3
  120. notionary/page/properties/models.py +3 -2
  121. notionary/page/properties/service.py +18 -3
  122. notionary/page/service.py +22 -80
  123. notionary/shared/entity/service.py +94 -36
  124. notionary/shared/models/cover.py +1 -1
  125. notionary/shared/typings.py +3 -0
  126. notionary/user/base.py +60 -11
  127. notionary/user/factory.py +0 -0
  128. notionary/utils/decorators.py +122 -0
  129. notionary/utils/fuzzy.py +18 -6
  130. notionary/utils/mixins/logging.py +38 -27
  131. notionary/utils/pagination.py +70 -16
  132. notionary/workspace/__init__.py +2 -1
  133. notionary/workspace/client.py +4 -2
  134. notionary/workspace/query/__init__.py +3 -0
  135. notionary/workspace/query/builder.py +25 -1
  136. notionary/workspace/query/models.py +12 -3
  137. notionary/workspace/query/service.py +57 -32
  138. notionary/workspace/service.py +31 -21
  139. {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/METADATA +35 -105
  140. notionary-0.3.1.dist-info/RECORD +211 -0
  141. notionary/page/content/markdown/nodes/toggleable_heading.py +0 -35
  142. notionary/page/content/parser/parsers/toggleable_heading.py +0 -150
  143. notionary/page/content/renderer/post_processing/handlers/numbered_list_placeholdere.py +0 -62
  144. notionary/page/content/renderer/renderers/toggleable_heading.py +0 -78
  145. notionary/utils/async_retry.py +0 -39
  146. notionary/utils/singleton.py +0 -13
  147. notionary-0.2.28.dist-info/RECORD +0 -200
  148. {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/WHEEL +0 -0
  149. {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -6,13 +6,13 @@ from notionary.blocks.rich_text.markdown_rich_text_converter import (
6
6
  from notionary.blocks.schemas import (
7
7
  BlockColor,
8
8
  CreateNumberedListItemBlock,
9
- NumberedListItemData,
9
+ CreateNumberedListItemData,
10
10
  )
11
11
  from notionary.page.content.parser.parsers.base import (
12
12
  BlockParsingContext,
13
13
  LineParser,
14
14
  )
15
- from notionary.page.content.syntax.service import SyntaxRegistry
15
+ from notionary.page.content.syntax import SyntaxRegistry
16
16
 
17
17
 
18
18
  class NumberedListParser(LineParser):
@@ -25,21 +25,65 @@ class NumberedListParser(LineParser):
25
25
  def _can_handle(self, context: BlockParsingContext) -> bool:
26
26
  if context.is_inside_parent_context():
27
27
  return False
28
- return self._syntax.regex_pattern.match(context.line) is not None
28
+ return self._is_numbered_list_line(context.line)
29
+
30
+ def _is_numbered_list_line(self, line: str) -> bool:
31
+ return self._syntax.regex_pattern.match(line) is not None
29
32
 
30
33
  @override
31
34
  async def _process(self, context: BlockParsingContext) -> None:
32
35
  block = await self._create_numbered_list_block(context.line)
33
- if block:
34
- context.result_blocks.append(block)
36
+ if not block:
37
+ return
38
+
39
+ await self._process_nested_children(block, context)
40
+ context.result_blocks.append(block)
41
+
42
+ async def _process_nested_children(self, block: CreateNumberedListItemBlock, context: BlockParsingContext) -> None:
43
+ child_lines = self._collect_child_lines(context)
44
+ if not child_lines:
45
+ return
46
+
47
+ child_blocks = await self._parse_child_blocks(child_lines, context)
48
+ if child_blocks:
49
+ block.numbered_list_item.children = child_blocks
50
+
51
+ context.lines_consumed = len(child_lines)
52
+
53
+ def _collect_child_lines(self, context: BlockParsingContext) -> list[str]:
54
+ parent_indent_level = context.get_line_indentation_level()
55
+ return context.collect_indented_child_lines(parent_indent_level)
56
+
57
+ async def _parse_child_blocks(
58
+ self, child_lines: list[str], context: BlockParsingContext
59
+ ) -> list[CreateNumberedListItemBlock]:
60
+ stripped_lines = self._remove_parent_indentation(child_lines, context)
61
+ children_text = self._convert_lines_to_text(stripped_lines)
62
+ return await context.parse_nested_markdown(children_text)
63
+
64
+ def _remove_parent_indentation(self, lines: list[str], context: BlockParsingContext) -> list[str]:
65
+ return context.strip_indentation_level(lines, levels=1)
66
+
67
+ def _convert_lines_to_text(self, lines: list[str]) -> str:
68
+ return "\n".join(lines)
35
69
 
36
70
  async def _create_numbered_list_block(self, text: str) -> CreateNumberedListItemBlock | None:
71
+ content = self._extract_list_content(text)
72
+ if content is None:
73
+ return None
74
+
75
+ rich_text = await self._convert_to_rich_text(content)
76
+ return self._build_block(rich_text)
77
+
78
+ def _extract_list_content(self, text: str) -> str | None:
37
79
  match = self._syntax.regex_pattern.match(text)
38
80
  if not match:
39
81
  return None
82
+ return match.group(3)
40
83
 
41
- content = match.group(3)
42
- rich_text = await self._rich_text_converter.to_rich_text(content)
84
+ async def _convert_to_rich_text(self, content: str):
85
+ return await self._rich_text_converter.to_rich_text(content)
43
86
 
44
- numbered_list_content = NumberedListItemData(rich_text=rich_text, color=BlockColor.DEFAULT)
87
+ def _build_block(self, rich_text) -> CreateNumberedListItemBlock:
88
+ numbered_list_content = CreateNumberedListItemData(rich_text=rich_text, color=BlockColor.DEFAULT)
45
89
  return CreateNumberedListItemBlock(numbered_list_item=numbered_list_content)
@@ -1,9 +1,10 @@
1
1
  from typing import override
2
2
 
3
+ from notionary.blocks.enums import BlockColor
3
4
  from notionary.blocks.rich_text.markdown_rich_text_converter import (
4
5
  MarkdownRichTextConverter,
5
6
  )
6
- from notionary.blocks.schemas import BlockColor, CreateParagraphBlock, ParagraphData
7
+ from notionary.blocks.schemas import CreateParagraphBlock, CreateParagraphData
7
8
  from notionary.page.content.parser.parsers.base import (
8
9
  BlockParsingContext,
9
10
  LineParser,
@@ -32,5 +33,5 @@ class ParagraphParser(LineParser):
32
33
  return None
33
34
 
34
35
  rich_text = await self._rich_text_converter.to_rich_text(text)
35
- paragraph_content = ParagraphData(rich_text=rich_text, color=BlockColor.DEFAULT)
36
+ paragraph_content = CreateParagraphData(rich_text=rich_text, color=BlockColor.DEFAULT)
36
37
  return CreateParagraphBlock(paragraph=paragraph_content)
@@ -9,7 +9,7 @@ from notionary.blocks.schemas import (
9
9
  FileType,
10
10
  )
11
11
  from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
12
- from notionary.page.content.syntax.service import SyntaxRegistry
12
+ from notionary.page.content.syntax import SyntaxRegistry
13
13
 
14
14
 
15
15
  class PdfParser(LineParser):
@@ -6,7 +6,7 @@ from notionary.page.content.parser.parsers.base import (
6
6
  BlockParsingContext,
7
7
  LineParser,
8
8
  )
9
- from notionary.page.content.syntax.service import SyntaxRegistry
9
+ from notionary.page.content.syntax import SyntaxRegistry
10
10
 
11
11
 
12
12
  class QuoteParser(LineParser):
@@ -21,15 +21,22 @@ class QuoteParser(LineParser):
21
21
  return False
22
22
  return self._is_quote(context.line)
23
23
 
24
+ def _is_quote(self, line: str) -> bool:
25
+ return self._syntax.regex_pattern.match(line) is not None
26
+
24
27
  @override
25
28
  async def _process(self, context: BlockParsingContext) -> None:
26
29
  quote_lines = self._collect_quote_lines(context)
27
- lines_consumed = len(quote_lines)
28
30
 
29
31
  block = await self._create_quote_block(quote_lines)
30
- if block:
31
- context.result_blocks.append(block)
32
- context.lines_consumed = lines_consumed
32
+ if not block:
33
+ return
34
+
35
+ # Lines consumed: all quote lines minus the current line (which is already being processed)
36
+ context.lines_consumed = len(quote_lines) - 1
37
+
38
+ await self._process_nested_children(block, context, quote_lines)
39
+ context.result_blocks.append(block)
33
40
 
34
41
  def _collect_quote_lines(self, context: BlockParsingContext) -> list[str]:
35
42
  quote_lines = [context.line]
@@ -39,27 +46,80 @@ class QuoteParser(LineParser):
39
46
  quote_lines.append(line)
40
47
  return quote_lines
41
48
 
42
- def _is_quote(self, line: str) -> bool:
43
- return self._syntax.regex_pattern.match(line) is not None
49
+ async def _process_nested_children(
50
+ self, block: CreateQuoteBlock, context: BlockParsingContext, quote_lines: list[str]
51
+ ) -> None:
52
+ # Calculate indent level after all quote lines
53
+ last_quote_line_index = len(quote_lines) - 1
54
+ child_lines = self._collect_child_lines_after_quote(context, last_quote_line_index)
55
+
56
+ if not child_lines:
57
+ return
58
+
59
+ child_blocks = await self._parse_child_blocks(child_lines, context)
60
+ if child_blocks:
61
+ block.quote.children = child_blocks
62
+
63
+ context.lines_consumed += len(child_lines)
64
+
65
+ def _collect_child_lines_after_quote(self, context: BlockParsingContext, last_quote_index: int) -> list[str]:
66
+ """Collect indented children after the quote block."""
67
+ parent_indent_level = context.get_line_indentation_level()
68
+ remaining_lines = context.get_remaining_lines()
69
+
70
+ # Skip the quote lines we already processed
71
+ lines_after_quote = remaining_lines[last_quote_index:]
72
+
73
+ child_lines = []
74
+ expected_child_indent = parent_indent_level + 1
75
+
76
+ for line in lines_after_quote:
77
+ if not line.strip():
78
+ child_lines.append(line)
79
+ continue
80
+
81
+ line_indent = context.get_line_indentation_level(line)
82
+ if line_indent >= expected_child_indent:
83
+ child_lines.append(line)
84
+ else:
85
+ break
86
+
87
+ return child_lines
88
+
89
+ async def _parse_child_blocks(self, child_lines: list[str], context: BlockParsingContext) -> list[CreateQuoteBlock]:
90
+ stripped_lines = self._remove_parent_indentation(child_lines, context)
91
+ children_text = self._convert_lines_to_text(stripped_lines)
92
+ return await context.parse_nested_markdown(children_text)
93
+
94
+ def _remove_parent_indentation(self, lines: list[str], context: BlockParsingContext) -> list[str]:
95
+ return context.strip_indentation_level(lines, levels=1)
96
+
97
+ def _convert_lines_to_text(self, lines: list[str]) -> str:
98
+ return "\n".join(lines)
44
99
 
45
100
  async def _create_quote_block(self, quote_lines: list[str]) -> CreateQuoteBlock | None:
46
- if not quote_lines:
101
+ contents = self._extract_quote_contents(quote_lines)
102
+ if not contents:
47
103
  return None
48
104
 
105
+ content = self._join_contents_for_multiline_quote(contents)
106
+ rich_text = await self._convert_to_rich_text(content)
107
+ return self._build_block(rich_text)
108
+
109
+ def _extract_quote_contents(self, quote_lines: list[str]) -> list[str]:
49
110
  contents = []
50
111
  for line in quote_lines:
51
112
  match = self._syntax.regex_pattern.match(line)
52
113
  if match:
53
114
  contents.append(match.group(1).strip())
115
+ return contents
54
116
 
55
- if not contents:
56
- return None
117
+ def _join_contents_for_multiline_quote(self, contents: list[str]) -> str:
118
+ return "\n".join(contents)
57
119
 
58
- content = self._join_contents_for_multiline_quote(contents)
120
+ async def _convert_to_rich_text(self, content: str):
121
+ return await self._rich_text_converter.to_rich_text(content)
59
122
 
60
- rich_text = await self._rich_text_converter.to_rich_text(content)
123
+ def _build_block(self, rich_text) -> CreateQuoteBlock:
61
124
  quote_data = CreateQuoteData(rich_text=rich_text, color=BlockColor.DEFAULT)
62
125
  return CreateQuoteBlock(quote=quote_data)
63
-
64
- def _join_contents_for_multiline_quote(self, contents: list[str]) -> str:
65
- return "\n".join(contents)
@@ -1,19 +1,15 @@
1
1
  from typing import override
2
2
 
3
- from notionary.blocks.schemas import BlockColor, CreateParagraphBlock, ParagraphData
3
+ from notionary.blocks.enums import BlockColor
4
+ from notionary.blocks.schemas import CreateParagraphBlock, CreateParagraphData
4
5
  from notionary.page.content.parser.parsers.base import (
5
6
  BlockParsingContext,
6
7
  LineParser,
7
8
  )
8
- from notionary.page.content.syntax.service import SyntaxRegistry
9
+ from notionary.page.content.syntax import SyntaxRegistry
9
10
 
10
11
 
11
12
  class SpaceParser(LineParser):
12
- """
13
- Parser for [space] markers that create empty paragraph blocks.
14
- Uses SyntaxRegistry for centralized syntax definition.
15
- """
16
-
17
13
  def __init__(self, syntax_registry: SyntaxRegistry) -> None:
18
14
  super().__init__(syntax_registry)
19
15
  self._syntax = syntax_registry.get_space_syntax()
@@ -22,8 +18,18 @@ class SpaceParser(LineParser):
22
18
  def _can_handle(self, context: BlockParsingContext) -> bool:
23
19
  if context.is_inside_parent_context():
24
20
  return False
21
+
22
+ if self._is_explicit_space_marker(context):
23
+ return True
24
+
25
+ return self._is_second_consecutive_empty_line(context)
26
+
27
+ def _is_explicit_space_marker(self, context: BlockParsingContext) -> bool:
25
28
  return self._syntax.regex_pattern.match(context.line.strip()) is not None
26
29
 
30
+ def _is_second_consecutive_empty_line(self, context: BlockParsingContext) -> bool:
31
+ return context.line.strip() == "" and context.is_previous_line_empty
32
+
27
33
  @override
28
34
  async def _process(self, context: BlockParsingContext) -> None:
29
35
  block = self._create_space_block()
@@ -31,5 +37,5 @@ class SpaceParser(LineParser):
31
37
  context.result_blocks.append(block)
32
38
 
33
39
  def _create_space_block(self) -> CreateParagraphBlock:
34
- paragraph_data = ParagraphData(rich_text=[], color=BlockColor.DEFAULT)
40
+ paragraph_data = CreateParagraphData(rich_text=[], color=BlockColor.DEFAULT)
35
41
  return CreateParagraphBlock(paragraph=paragraph_data)
@@ -4,7 +4,7 @@ from notionary.blocks.rich_text.markdown_rich_text_converter import MarkdownRich
4
4
  from notionary.blocks.rich_text.models import RichText
5
5
  from notionary.blocks.schemas import CreateTableBlock, CreateTableData, CreateTableRowBlock, TableRowData
6
6
  from notionary.page.content.parser.parsers import BlockParsingContext, LineParser
7
- from notionary.page.content.syntax.service import SyntaxRegistry
7
+ from notionary.page.content.syntax import SyntaxRegistry
8
8
 
9
9
 
10
10
  class TableParser(LineParser):
@@ -5,7 +5,7 @@ from notionary.page.content.parser.parsers.base import (
5
5
  BlockParsingContext,
6
6
  LineParser,
7
7
  )
8
- from notionary.page.content.syntax.service import SyntaxRegistry
8
+ from notionary.page.content.syntax import SyntaxRegistry
9
9
 
10
10
 
11
11
  class TableOfContentsParser(LineParser):
@@ -1,16 +1,14 @@
1
- """Parser for todo/checkbox blocks."""
2
-
3
1
  from typing import override
4
2
 
5
3
  from notionary.blocks.rich_text.markdown_rich_text_converter import (
6
4
  MarkdownRichTextConverter,
7
5
  )
8
- from notionary.blocks.schemas import BlockColor, CreateToDoBlock, ToDoData
6
+ from notionary.blocks.schemas import BlockColor, CreateToDoBlock, CreateToDoData
9
7
  from notionary.page.content.parser.parsers.base import (
10
8
  BlockParsingContext,
11
9
  LineParser,
12
10
  )
13
- from notionary.page.content.syntax.service import SyntaxRegistry
11
+ from notionary.page.content.syntax import SyntaxRegistry
14
12
 
15
13
 
16
14
  class TodoParser(LineParser):
@@ -24,33 +22,73 @@ class TodoParser(LineParser):
24
22
  def _can_handle(self, context: BlockParsingContext) -> bool:
25
23
  if context.is_inside_parent_context():
26
24
  return False
25
+ return self._is_todo_line(context.line)
27
26
 
27
+ def _is_todo_line(self, line: str) -> bool:
28
28
  return (
29
- self._syntax.regex_pattern.match(context.line) is not None
30
- or self._syntax_done.regex_pattern.match(context.line) is not None
29
+ self._syntax.regex_pattern.match(line) is not None
30
+ or self._syntax_done.regex_pattern.match(line) is not None
31
31
  )
32
32
 
33
33
  @override
34
34
  async def _process(self, context: BlockParsingContext) -> None:
35
35
  block = await self._create_todo_block(context.line)
36
- if block:
37
- context.result_blocks.append(block)
36
+ if not block:
37
+ return
38
+
39
+ await self._process_nested_children(block, context)
40
+ context.result_blocks.append(block)
41
+
42
+ async def _process_nested_children(self, block: CreateToDoBlock, context: BlockParsingContext) -> None:
43
+ child_lines = self._collect_child_lines(context)
44
+ if not child_lines:
45
+ return
46
+
47
+ child_blocks = await self._parse_child_blocks(child_lines, context)
48
+ if child_blocks:
49
+ block.to_do.children = child_blocks
50
+
51
+ context.lines_consumed = len(child_lines)
52
+
53
+ def _collect_child_lines(self, context: BlockParsingContext) -> list[str]:
54
+ parent_indent_level = context.get_line_indentation_level()
55
+ return context.collect_indented_child_lines(parent_indent_level)
56
+
57
+ async def _parse_child_blocks(self, child_lines: list[str], context: BlockParsingContext) -> list[CreateToDoBlock]:
58
+ stripped_lines = self._remove_parent_indentation(child_lines, context)
59
+ children_text = self._convert_lines_to_text(stripped_lines)
60
+ return await context.parse_nested_markdown(children_text)
61
+
62
+ def _remove_parent_indentation(self, lines: list[str], context: BlockParsingContext) -> list[str]:
63
+ return context.strip_indentation_level(lines, levels=1)
64
+
65
+ def _convert_lines_to_text(self, lines: list[str]) -> str:
66
+ return "\n".join(lines)
38
67
 
39
68
  async def _create_todo_block(self, text: str) -> CreateToDoBlock | None:
40
- done_match = self._syntax_done.regex_pattern.match(text)
41
- todo_match = None if done_match else self._syntax.regex_pattern.match(text)
69
+ content, checked = self._extract_todo_content(text)
70
+ if content is None:
71
+ return None
42
72
 
73
+ rich_text = await self._convert_to_rich_text(content)
74
+ return self._build_block(rich_text, checked)
75
+
76
+ def _extract_todo_content(self, text: str) -> tuple[str | None, bool]:
77
+ done_match = self._syntax_done.regex_pattern.match(text)
43
78
  if done_match:
44
- content = done_match.group(1)
45
- checked = True
46
- elif todo_match:
47
- content = todo_match.group(1)
48
- checked = False
49
- else:
50
- return None
79
+ return done_match.group(1), True
80
+
81
+ todo_match = self._syntax.regex_pattern.match(text)
82
+ if todo_match:
83
+ return todo_match.group(1), False
84
+
85
+ return None, False
86
+
87
+ async def _convert_to_rich_text(self, content: str):
88
+ return await self._rich_text_converter.to_rich_text(content)
51
89
 
52
- rich_text = await self._rich_text_converter.to_rich_text(content)
53
- todo_content = ToDoData(
90
+ def _build_block(self, rich_text, checked: bool) -> CreateToDoBlock:
91
+ todo_content = CreateToDoData(
54
92
  rich_text=rich_text,
55
93
  checked=checked,
56
94
  color=BlockColor.DEFAULT,
@@ -5,9 +5,8 @@ from notionary.blocks.schemas import BlockColor, CreateToggleBlock, CreateToggle
5
5
  from notionary.page.content.parser.parsers import (
6
6
  BlockParsingContext,
7
7
  LineParser,
8
- ParentBlockContext,
9
8
  )
10
- from notionary.page.content.syntax.service import SyntaxRegistry
9
+ from notionary.page.content.syntax import SyntaxRegistry
11
10
 
12
11
 
13
12
  class ToggleParser(LineParser):
@@ -19,18 +18,12 @@ class ToggleParser(LineParser):
19
18
 
20
19
  @override
21
20
  def _can_handle(self, context: BlockParsingContext) -> bool:
22
- return self._is_toggle_start(context) or self._is_toggle_end(context) or self._is_toggle_content(context)
21
+ return self._is_toggle_start(context)
23
22
 
24
23
  @override
25
24
  async def _process(self, context: BlockParsingContext) -> None:
26
25
  if self._is_toggle_start(context):
27
- await self._start_toggle(context)
28
-
29
- if self._is_toggle_end(context):
30
- await self._finalize_toggle(context)
31
-
32
- if self._is_toggle_content(context):
33
- self._add_toggle_content(context)
26
+ await self._process_toggle(context)
34
27
 
35
28
  def _is_toggle_start(self, context: BlockParsingContext) -> bool:
36
29
  if not self._syntax.regex_pattern.match(context.line):
@@ -42,26 +35,14 @@ class ToggleParser(LineParser):
42
35
  def is_heading_start(self, line: str) -> bool:
43
36
  return self._heading_syntax.regex_pattern.match(line) is not None
44
37
 
45
- def _is_toggle_end(self, context: BlockParsingContext) -> bool:
46
- if not self._syntax.end_regex_pattern.match(context.line):
47
- return False
48
-
49
- if not context.parent_stack:
50
- return False
51
-
52
- current_parent = context.parent_stack[-1]
53
- return isinstance(current_parent.block, CreateToggleBlock)
54
-
55
- async def _start_toggle(self, context: BlockParsingContext) -> None:
38
+ async def _process_toggle(self, context: BlockParsingContext) -> None:
56
39
  block = await self._create_toggle_block(context.line)
57
40
  if not block:
58
41
  return
59
42
 
60
- parent_context = ParentBlockContext(
61
- block=block,
62
- child_lines=[],
63
- )
64
- context.parent_stack.append(parent_context)
43
+ await self._process_nested_children(block, context)
44
+
45
+ context.result_blocks.append(block)
65
46
 
66
47
  async def _create_toggle_block(self, line: str) -> CreateToggleBlock | None:
67
48
  if not (match := self._syntax.regex_pattern.match(line)):
@@ -73,55 +54,17 @@ class ToggleParser(LineParser):
73
54
  toggle_content = CreateToggleData(rich_text=rich_text, color=BlockColor.DEFAULT, children=[])
74
55
  return CreateToggleBlock(toggle=toggle_content)
75
56
 
76
- async def _finalize_toggle(self, context: BlockParsingContext) -> None:
77
- toggle_context = context.parent_stack.pop()
78
- await self._assign_toggle_children_if_any(toggle_context, context)
79
-
80
- if self._is_nested_in_other_parent_context(context):
81
- self._assign_to_parent_context(context, toggle_context)
82
- else:
83
- context.result_blocks.append(toggle_context.block)
84
-
85
- def _is_nested_in_other_parent_context(self, context: BlockParsingContext) -> bool:
86
- return context.parent_stack
57
+ async def _process_nested_children(self, block: CreateToggleBlock, context: BlockParsingContext) -> None:
58
+ parent_indent_level = context.get_line_indentation_level()
59
+ child_lines = context.collect_indented_child_lines(parent_indent_level)
87
60
 
88
- def _assign_to_parent_context(self, context: BlockParsingContext, toggle_context: ParentBlockContext) -> None:
89
- parent_context = context.parent_stack[-1]
90
- parent_context.add_child_block(toggle_context.block)
91
-
92
- async def _assign_toggle_children_if_any(
93
- self, toggle_context: ParentBlockContext, context: BlockParsingContext
94
- ) -> None:
95
- all_children = []
96
-
97
- # Process text lines
98
- if toggle_context.child_lines:
99
- children_text = "\n".join(toggle_context.child_lines)
100
- text_blocks = await self._parse_nested_content(children_text, context)
101
- all_children.extend(text_blocks)
102
-
103
- if toggle_context.child_blocks:
104
- all_children.extend(toggle_context.child_blocks)
105
-
106
- toggle_context.block.toggle.children = all_children
107
-
108
- def _is_toggle_content(self, context: BlockParsingContext) -> bool:
109
- if not context.parent_stack:
110
- return False
111
-
112
- current_parent = context.parent_stack[-1]
113
- if not isinstance(current_parent.block, CreateToggleBlock):
114
- return False
115
-
116
- return not (
117
- self._syntax.regex_pattern.match(context.line) or self._syntax.end_regex_pattern.match(context.line)
118
- )
61
+ if not child_lines:
62
+ return
119
63
 
120
- def _add_toggle_content(self, context: BlockParsingContext) -> None:
121
- context.parent_stack[-1].add_child_line(context.line)
64
+ stripped_lines = context.strip_indentation_level(child_lines, levels=1)
65
+ child_markdown = "\n".join(stripped_lines)
122
66
 
123
- async def _parse_nested_content(self, text: str, context: BlockParsingContext) -> list:
124
- if not text.strip():
125
- return []
67
+ child_blocks = await context.parse_nested_markdown(child_markdown)
68
+ block.toggle.children = child_blocks
126
69
 
127
- return await context.parse_nested_content(text)
70
+ context.lines_consumed = len(child_lines)
@@ -9,7 +9,7 @@ from notionary.blocks.schemas import (
9
9
  FileType,
10
10
  )
11
11
  from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
12
- from notionary.page.content.syntax.service import SyntaxRegistry
12
+ from notionary.page.content.syntax import SyntaxRegistry
13
13
 
14
14
 
15
15
  class VideoParser(LineParser):
@@ -65,10 +65,12 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
65
65
  self._truncate_rich_text_list(content.caption)
66
66
 
67
67
  if hasattr(content, "children"):
68
- for child in content.children:
69
- child_content = self._get_block_content(child)
70
- if child_content:
71
- self._truncate_content(child_content)
68
+ children = getattr(content, "children", None)
69
+ if children:
70
+ for child in children:
71
+ child_content = self._get_block_content(child)
72
+ if child_content:
73
+ self._truncate_content(child_content)
72
74
 
73
75
  def _truncate_rich_text_list(self, rich_text_list: list[RichText]) -> None:
74
76
  for rich_text in rich_text_list:
@@ -14,6 +14,7 @@ type _NestedBlockList = BlockCreatePayload | list["_NestedBlockList"]
14
14
 
15
15
  class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
16
16
  NOTION_MAX_LENGTH = 2000
17
+ ELLIPSIS = "..."
17
18
 
18
19
  def __init__(self, max_text_length: int = NOTION_MAX_LENGTH) -> None:
19
20
  self.max_text_length = max_text_length
@@ -58,36 +59,56 @@ class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
58
59
  return None
59
60
 
60
61
  def _truncate_content(self, content: object) -> None:
62
+ self._truncate_rich_text_fields(content)
63
+ self._truncate_children_recursively(content)
64
+
65
+ def _truncate_rich_text_fields(self, content: object) -> None:
61
66
  if hasattr(content, "rich_text"):
62
67
  self._truncate_rich_text_list(content.rich_text)
63
68
 
64
69
  if hasattr(content, "caption"):
65
70
  self._truncate_rich_text_list(content.caption)
66
71
 
67
- if hasattr(content, "children"):
68
- for child in content.children:
69
- child_content = self._get_block_content(child)
70
- if child_content:
71
- self._truncate_content(child_content)
72
+ def _truncate_children_recursively(self, content: object) -> None:
73
+ if not hasattr(content, "children"):
74
+ return
75
+
76
+ children = getattr(content, "children", None)
77
+ if not children:
78
+ return
79
+
80
+ for child in children:
81
+ self._truncate_child_content(child)
82
+
83
+ def _truncate_child_content(self, child: Any) -> None:
84
+ child_content = self._get_block_content(child)
85
+ if child_content:
86
+ self._truncate_content(child_content)
72
87
 
73
88
  def _truncate_rich_text_list(self, rich_text_list: list[RichText]) -> None:
74
89
  for rich_text in rich_text_list:
75
- if not self._is_text_type(rich_text):
76
- continue
77
-
78
- content = rich_text.text.content
79
- if len(content) > self.max_text_length:
80
- self.logger.warning(
81
- "Truncating text content from %d to %d characters",
82
- len(content),
83
- self.max_text_length,
84
- )
85
- truncated_content = self._create_truncated_text_with_ellipsis(content)
86
- rich_text.text.content = truncated_content
87
-
88
- def _create_truncated_text_with_ellipsis(self, content: str) -> str:
89
- cutoff = self.max_text_length - 3
90
- return content[:cutoff] + "..."
90
+ if self._should_truncate(rich_text):
91
+ self._truncate_single_rich_text(rich_text)
92
+
93
+ def _should_truncate(self, rich_text: RichText) -> bool:
94
+ if not self._is_text_type(rich_text):
95
+ return False
96
+
97
+ return len(rich_text.text.content) > self.max_text_length
98
+
99
+ def _truncate_single_rich_text(self, rich_text: RichText) -> None:
100
+ original_length = len(rich_text.text.content)
101
+ rich_text.text.content = self._create_truncated_text(rich_text.text.content)
102
+
103
+ self.logger.warning(
104
+ "Truncating text content from %d to %d characters",
105
+ original_length,
106
+ self.max_text_length,
107
+ )
108
+
109
+ def _create_truncated_text(self, content: str) -> str:
110
+ cutoff = self.max_text_length - len(self.ELLIPSIS)
111
+ return content[:cutoff] + self.ELLIPSIS
91
112
 
92
113
  def _is_text_type(self, rich_text: RichText) -> bool:
93
- return rich_text.type == RichTextType.TEXT and rich_text.text and rich_text.text.content
114
+ return rich_text.type == RichTextType.TEXT and rich_text.text is not None and rich_text.text.content