notionary 0.2.28__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. notionary/__init__.py +9 -2
  2. notionary/blocks/__init__.py +5 -0
  3. notionary/blocks/client.py +6 -4
  4. notionary/blocks/enums.py +28 -1
  5. notionary/blocks/rich_text/markdown_rich_text_converter.py +14 -0
  6. notionary/blocks/rich_text/models.py +14 -0
  7. notionary/blocks/rich_text/name_id_resolver/__init__.py +2 -0
  8. notionary/blocks/rich_text/name_id_resolver/data_source.py +32 -0
  9. notionary/blocks/rich_text/rich_text_markdown_converter.py +12 -0
  10. notionary/blocks/rich_text/rich_text_patterns.py +3 -0
  11. notionary/blocks/schemas.py +42 -10
  12. notionary/comments/__init__.py +5 -0
  13. notionary/comments/client.py +7 -10
  14. notionary/comments/factory.py +4 -6
  15. notionary/data_source/http/data_source_instance_client.py +14 -4
  16. notionary/data_source/properties/{models.py → schemas.py} +4 -8
  17. notionary/data_source/query/__init__.py +9 -0
  18. notionary/data_source/query/builder.py +38 -10
  19. notionary/data_source/query/schema.py +13 -10
  20. notionary/data_source/query/validator.py +11 -11
  21. notionary/data_source/schema/registry.py +104 -0
  22. notionary/data_source/schema/service.py +136 -0
  23. notionary/data_source/schemas.py +1 -1
  24. notionary/data_source/service.py +29 -103
  25. notionary/database/service.py +17 -60
  26. notionary/exceptions/__init__.py +5 -1
  27. notionary/exceptions/block_parsing.py +21 -0
  28. notionary/exceptions/search.py +24 -0
  29. notionary/http/client.py +9 -10
  30. notionary/http/models.py +5 -4
  31. notionary/page/content/factory.py +10 -3
  32. notionary/page/content/markdown/builder.py +76 -154
  33. notionary/page/content/markdown/nodes/__init__.py +0 -2
  34. notionary/page/content/markdown/nodes/audio.py +1 -1
  35. notionary/page/content/markdown/nodes/base.py +1 -1
  36. notionary/page/content/markdown/nodes/bookmark.py +1 -1
  37. notionary/page/content/markdown/nodes/breadcrumb.py +1 -1
  38. notionary/page/content/markdown/nodes/bulleted_list.py +31 -8
  39. notionary/page/content/markdown/nodes/callout.py +12 -10
  40. notionary/page/content/markdown/nodes/code.py +3 -5
  41. notionary/page/content/markdown/nodes/columns.py +39 -21
  42. notionary/page/content/markdown/nodes/container.py +64 -0
  43. notionary/page/content/markdown/nodes/divider.py +1 -1
  44. notionary/page/content/markdown/nodes/embed.py +1 -1
  45. notionary/page/content/markdown/nodes/equation.py +1 -1
  46. notionary/page/content/markdown/nodes/file.py +1 -1
  47. notionary/page/content/markdown/nodes/heading.py +26 -6
  48. notionary/page/content/markdown/nodes/image.py +1 -1
  49. notionary/page/content/markdown/nodes/mixins/__init__.py +5 -0
  50. notionary/page/content/markdown/nodes/mixins/caption.py +1 -1
  51. notionary/page/content/markdown/nodes/numbered_list.py +28 -5
  52. notionary/page/content/markdown/nodes/paragraph.py +1 -1
  53. notionary/page/content/markdown/nodes/pdf.py +1 -1
  54. notionary/page/content/markdown/nodes/quote.py +17 -5
  55. notionary/page/content/markdown/nodes/space.py +1 -1
  56. notionary/page/content/markdown/nodes/table.py +1 -1
  57. notionary/page/content/markdown/nodes/table_of_contents.py +1 -1
  58. notionary/page/content/markdown/nodes/todo.py +23 -7
  59. notionary/page/content/markdown/nodes/toggle.py +13 -14
  60. notionary/page/content/markdown/nodes/video.py +1 -1
  61. notionary/page/content/parser/context.py +98 -21
  62. notionary/page/content/parser/factory.py +1 -10
  63. notionary/page/content/parser/parsers/__init__.py +0 -2
  64. notionary/page/content/parser/parsers/audio.py +1 -1
  65. notionary/page/content/parser/parsers/base.py +1 -1
  66. notionary/page/content/parser/parsers/bookmark.py +1 -1
  67. notionary/page/content/parser/parsers/breadcrumb.py +1 -1
  68. notionary/page/content/parser/parsers/bulleted_list.py +52 -8
  69. notionary/page/content/parser/parsers/callout.py +55 -84
  70. notionary/page/content/parser/parsers/caption.py +1 -1
  71. notionary/page/content/parser/parsers/code.py +5 -5
  72. notionary/page/content/parser/parsers/column.py +23 -64
  73. notionary/page/content/parser/parsers/column_list.py +45 -45
  74. notionary/page/content/parser/parsers/divider.py +1 -1
  75. notionary/page/content/parser/parsers/embed.py +1 -1
  76. notionary/page/content/parser/parsers/equation.py +1 -1
  77. notionary/page/content/parser/parsers/file.py +1 -1
  78. notionary/page/content/parser/parsers/heading.py +65 -8
  79. notionary/page/content/parser/parsers/image.py +1 -1
  80. notionary/page/content/parser/parsers/numbered_list.py +52 -8
  81. notionary/page/content/parser/parsers/paragraph.py +3 -2
  82. notionary/page/content/parser/parsers/pdf.py +1 -1
  83. notionary/page/content/parser/parsers/quote.py +75 -15
  84. notionary/page/content/parser/parsers/space.py +14 -8
  85. notionary/page/content/parser/parsers/table.py +1 -1
  86. notionary/page/content/parser/parsers/table_of_contents.py +1 -1
  87. notionary/page/content/parser/parsers/todo.py +57 -19
  88. notionary/page/content/parser/parsers/toggle.py +17 -74
  89. notionary/page/content/parser/parsers/video.py +1 -1
  90. notionary/page/content/parser/post_processing/handlers/rich_text_length.py +6 -4
  91. notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +43 -22
  92. notionary/page/content/parser/pre_processsing/handlers/__init__.py +4 -0
  93. notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +108 -54
  94. notionary/page/content/parser/pre_processsing/handlers/indentation.py +86 -0
  95. notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +66 -0
  96. notionary/page/content/parser/pre_processsing/handlers/whitespace.py +14 -7
  97. notionary/page/content/parser/service.py +9 -0
  98. notionary/page/content/renderer/context.py +5 -2
  99. notionary/page/content/renderer/factory.py +2 -11
  100. notionary/page/content/renderer/post_processing/handlers/__init__.py +2 -2
  101. notionary/page/content/renderer/post_processing/handlers/numbered_list.py +156 -0
  102. notionary/page/content/renderer/renderers/__init__.py +0 -2
  103. notionary/page/content/renderer/renderers/base.py +1 -1
  104. notionary/page/content/renderer/renderers/bulleted_list.py +1 -1
  105. notionary/page/content/renderer/renderers/callout.py +6 -21
  106. notionary/page/content/renderer/renderers/captioned_block.py +1 -1
  107. notionary/page/content/renderer/renderers/column.py +28 -19
  108. notionary/page/content/renderer/renderers/column_list.py +24 -11
  109. notionary/page/content/renderer/renderers/heading.py +53 -27
  110. notionary/page/content/renderer/renderers/numbered_list.py +6 -5
  111. notionary/page/content/renderer/renderers/quote.py +1 -1
  112. notionary/page/content/renderer/renderers/todo.py +1 -1
  113. notionary/page/content/renderer/renderers/toggle.py +6 -7
  114. notionary/page/content/service.py +4 -1
  115. notionary/page/content/syntax/__init__.py +4 -0
  116. notionary/page/content/syntax/grammar.py +10 -0
  117. notionary/page/content/syntax/models.py +0 -2
  118. notionary/page/content/syntax/{service.py → registry.py} +31 -91
  119. notionary/page/properties/client.py +3 -3
  120. notionary/page/properties/models.py +3 -2
  121. notionary/page/properties/service.py +18 -3
  122. notionary/page/service.py +22 -80
  123. notionary/shared/entity/service.py +94 -36
  124. notionary/shared/models/cover.py +1 -1
  125. notionary/shared/typings.py +3 -0
  126. notionary/user/base.py +60 -11
  127. notionary/user/factory.py +0 -0
  128. notionary/utils/decorators.py +122 -0
  129. notionary/utils/fuzzy.py +18 -6
  130. notionary/utils/mixins/logging.py +38 -27
  131. notionary/utils/pagination.py +70 -16
  132. notionary/workspace/__init__.py +2 -1
  133. notionary/workspace/client.py +4 -2
  134. notionary/workspace/query/__init__.py +3 -0
  135. notionary/workspace/query/builder.py +25 -1
  136. notionary/workspace/query/models.py +12 -3
  137. notionary/workspace/query/service.py +57 -32
  138. notionary/workspace/service.py +31 -21
  139. {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/METADATA +35 -105
  140. notionary-0.3.1.dist-info/RECORD +211 -0
  141. notionary/page/content/markdown/nodes/toggleable_heading.py +0 -35
  142. notionary/page/content/parser/parsers/toggleable_heading.py +0 -150
  143. notionary/page/content/renderer/post_processing/handlers/numbered_list_placeholdere.py +0 -62
  144. notionary/page/content/renderer/renderers/toggleable_heading.py +0 -78
  145. notionary/utils/async_retry.py +0 -39
  146. notionary/utils/singleton.py +0 -13
  147. notionary-0.2.28.dist-info/RECORD +0 -200
  148. {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/WHEEL +0 -0
  149. {notionary-0.2.28.dist-info → notionary-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,13 @@
1
1
  from .column_syntax import ColumnSyntaxPreProcessor
2
+ from .indentation import IndentationNormalizer
2
3
  from .port import PreProcessor
4
+ from .video_syntax import VideoFormatPreProcessor
3
5
  from .whitespace import WhitespacePreProcessor
4
6
 
5
7
  __all__ = [
6
8
  "ColumnSyntaxPreProcessor",
9
+ "IndentationNormalizer",
7
10
  "PreProcessor",
11
+ "VideoFormatPreProcessor",
8
12
  "WhitespacePreProcessor",
9
13
  ]
@@ -3,78 +3,132 @@ from typing import override
3
3
 
4
4
  from notionary.exceptions.block_parsing import InsufficientColumnsError, InvalidColumnRatioSumError
5
5
  from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
6
- from notionary.page.content.syntax.service import SyntaxRegistry
6
+ from notionary.page.content.syntax import MarkdownGrammar, SyntaxRegistry
7
+ from notionary.utils.decorators import time_execution_sync
8
+ from notionary.utils.mixins.logging import LoggingMixin
7
9
 
8
- RATIO_TOLERANCE = 0.0001
9
10
 
11
+ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
12
+ _RATIO_TOLERANCE = 0.0001
13
+ _MINIMUM_COLUMNS = 2
10
14
 
11
- class ColumnSyntaxPreProcessor(PreProcessor):
12
- def __init__(self, syntax_registry: SyntaxRegistry | None = None) -> None:
15
+ def __init__(
16
+ self, syntax_registry: SyntaxRegistry | None = None, markdown_grammar: MarkdownGrammar | None = None
17
+ ) -> None:
18
+ super().__init__()
13
19
  self._syntax_registry = syntax_registry or SyntaxRegistry()
14
- self._column_list_syntax = self._syntax_registry.get_column_list_syntax()
15
- self._column_syntax = self._syntax_registry.get_column_syntax()
20
+ self._markdown_grammar = markdown_grammar or MarkdownGrammar()
21
+
22
+ self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
23
+ self._column_list_delimiter = self._syntax_registry.get_column_list_syntax().start_delimiter
24
+ self._column_delimiter = self._syntax_registry.get_column_syntax().start_delimiter
25
+ self._column_pattern = self._syntax_registry.get_column_syntax().regex_pattern
16
26
 
17
27
  @override
28
+ @time_execution_sync()
18
29
  def process(self, markdown_text: str) -> str:
19
- if not self._has_columns_blocks(markdown_text):
30
+ if not self._contains_column_lists(markdown_text):
20
31
  return markdown_text
21
32
 
22
- columns_blocks = self._extract_columns_blocks(markdown_text)
23
-
24
- for content in columns_blocks:
25
- column_matches = self._find_column_blocks(content)
26
- column_count = len(column_matches)
27
- self._validate_minimum_columns(column_count)
28
- ratios = self._extract_ratios(column_matches)
29
- self._validate_ratios(ratios, column_count)
33
+ self._validate_all_column_lists(markdown_text)
30
34
  return markdown_text
31
35
 
32
- def _has_columns_blocks(self, markdown_text: str) -> bool:
33
- return self._column_list_syntax.start_delimiter in markdown_text
36
+ def _contains_column_lists(self, markdown_text: str) -> bool:
37
+ return self._column_list_delimiter in markdown_text
38
+
39
+ def _validate_all_column_lists(self, markdown_text: str) -> None:
40
+ column_list_blocks = self._extract_column_list_blocks(markdown_text)
41
+
42
+ for block in column_list_blocks:
43
+ self._validate_column_list_block(block)
34
44
 
35
- def _extract_columns_blocks(self, markdown_text: str) -> list[str]:
36
- columns_blocks = []
45
+ def _extract_column_list_blocks(self, markdown_text: str) -> list[str]:
37
46
  lines = markdown_text.split("\n")
47
+ blocks = []
48
+
38
49
  for index, line in enumerate(lines):
39
- if line.strip() == self._column_list_syntax.start_delimiter:
40
- content = self._extract_block_content(lines, index + 1)
41
- if content is not None:
42
- columns_blocks.append(content)
43
- return columns_blocks
44
-
45
- def _extract_block_content(self, lines: list[str], start_index: int) -> str | None:
46
- depth = 1
47
- end_index = start_index
48
- block_start = self._column_list_syntax.start_delimiter.split()[0]
49
- while end_index < len(lines) and depth > 0:
50
- line = lines[end_index].strip()
51
- if line.startswith(f"{block_start} "):
52
- depth += 1
53
- elif line == self._column_list_syntax.end_delimiter:
54
- depth -= 1
55
- end_index += 1
56
- if depth == 0:
57
- return "\n".join(lines[start_index : end_index - 1])
58
- return None
59
-
60
- def _find_column_blocks(self, content: str) -> list[re.Match]:
61
- return list(self._column_syntax.regex_pattern.finditer(content))
62
-
63
- def _validate_minimum_columns(self, column_count: int) -> None:
64
- if column_count < 2:
50
+ if self._is_column_list_start(line):
51
+ block_content = self._extract_indented_block(lines, index + 1)
52
+ blocks.append(block_content)
53
+
54
+ return blocks
55
+
56
+ def _is_column_list_start(self, line: str) -> bool:
57
+ return line.strip() == self._column_list_delimiter
58
+
59
+ def _extract_indented_block(self, lines: list[str], start_index: int) -> str:
60
+ if start_index >= len(lines):
61
+ return ""
62
+
63
+ base_indentation = self._get_indentation_level(lines[start_index])
64
+ base_spaces = base_indentation * self._spaces_per_nesting_level
65
+ block_lines = []
66
+
67
+ for line in lines[start_index:]:
68
+ if self._is_empty_line(line):
69
+ block_lines.append(line)
70
+ continue
71
+
72
+ current_indentation = self._get_indentation_level(line)
73
+
74
+ if current_indentation < base_indentation:
75
+ break
76
+
77
+ block_lines.append(line[base_spaces:] if len(line) >= base_spaces else line)
78
+
79
+ return "\n".join(block_lines)
80
+
81
+ def _is_empty_line(self, line: str) -> bool:
82
+ return not line.strip()
83
+
84
+ def _get_indentation_level(self, line: str) -> int:
85
+ leading_spaces = len(line) - len(line.lstrip())
86
+ return leading_spaces // self._spaces_per_nesting_level
87
+
88
+ def _validate_column_list_block(self, block_content: str) -> None:
89
+ column_matches = self._find_all_columns(block_content)
90
+ column_count = len(column_matches)
91
+
92
+ self._validate_minimum_column_count(column_count)
93
+
94
+ ratios = self._extract_column_ratios(column_matches)
95
+ self._validate_ratio_sum(ratios, column_count)
96
+
97
+ def _find_all_columns(self, content: str) -> list[re.Match]:
98
+ return list(self._column_pattern.finditer(content))
99
+
100
+ def _validate_minimum_column_count(self, column_count: int) -> None:
101
+ if column_count < self._MINIMUM_COLUMNS:
102
+ self.logger.error(
103
+ f"Column list must contain at least {self._MINIMUM_COLUMNS} columns, found {column_count}"
104
+ )
65
105
  raise InsufficientColumnsError(column_count)
66
106
 
67
- def _extract_ratios(self, column_matches: list[re.Match]) -> list[float]:
107
+ def _extract_column_ratios(self, column_matches: list[re.Match]) -> list[float]:
68
108
  ratios = []
109
+
69
110
  for match in column_matches:
70
- ratio_str = match.group(1)
71
- if ratio_str and ratio_str != "1":
72
- ratios.append(float(ratio_str))
111
+ ratio_text = match.group(1)
112
+ if self._has_explicit_ratio(ratio_text):
113
+ ratios.append(float(ratio_text))
114
+
73
115
  return ratios
74
116
 
75
- def _validate_ratios(self, ratios: list[float], column_count: int) -> None:
76
- if not ratios or len(ratios) != column_count:
117
+ def _has_explicit_ratio(self, ratio_text: str | None) -> bool:
118
+ return ratio_text is not None and ratio_text != "1"
119
+
120
+ def _validate_ratio_sum(self, ratios: list[float], column_count: int) -> None:
121
+ if not self._should_validate_ratios(ratios, column_count):
77
122
  return
78
- total = sum(ratios)
79
- if abs(total - 1.0) > RATIO_TOLERANCE:
80
- raise InvalidColumnRatioSumError(total, RATIO_TOLERANCE)
123
+
124
+ total_ratio = sum(ratios)
125
+
126
+ if not self._is_ratio_sum_valid(total_ratio):
127
+ self.logger.error(f"Column ratios must sum to 1.0 (±{self._RATIO_TOLERANCE}), but sum to {total_ratio:.4f}")
128
+ raise InvalidColumnRatioSumError(total_ratio, self._RATIO_TOLERANCE)
129
+
130
+ def _should_validate_ratios(self, ratios: list[float], column_count: int) -> bool:
131
+ return len(ratios) > 0 and len(ratios) == column_count
132
+
133
+ def _is_ratio_sum_valid(self, total: float) -> bool:
134
+ return abs(total - 1.0) <= self._RATIO_TOLERANCE
@@ -0,0 +1,86 @@
1
+ import math
2
+ from typing import override
3
+
4
+ from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
5
+ from notionary.page.content.syntax import MarkdownGrammar, SyntaxRegistry
6
+ from notionary.utils.decorators import time_execution_sync
7
+ from notionary.utils.mixins.logging import LoggingMixin
8
+
9
+
10
+ class IndentationNormalizer(PreProcessor, LoggingMixin):
11
+ def __init__(
12
+ self, syntax_registry: SyntaxRegistry | None = None, markdown_grammar: MarkdownGrammar | None = None
13
+ ) -> None:
14
+ super().__init__()
15
+ self._syntax_registry = syntax_registry or SyntaxRegistry()
16
+ self._markdown_grammar = markdown_grammar or MarkdownGrammar()
17
+
18
+ self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
19
+ self._code_block_start_delimiter = self._syntax_registry.get_code_syntax().start_delimiter
20
+
21
+ @override
22
+ @time_execution_sync()
23
+ def process(self, markdown_text: str) -> str:
24
+ if self._is_empty(markdown_text):
25
+ return ""
26
+
27
+ normalized = self._normalize_to_markdown_indentation(markdown_text)
28
+
29
+ if normalized != markdown_text:
30
+ self.logger.warning(
31
+ "Corrected non-standard indentation. Check the result for formatting errors and use consistent indentation in the source."
32
+ )
33
+
34
+ return normalized
35
+
36
+ def _is_empty(self, text: str) -> bool:
37
+ return not text
38
+
39
+ def _normalize_to_markdown_indentation(self, markdown_text: str) -> str:
40
+ lines = markdown_text.split("\n")
41
+ processed_lines = []
42
+ inside_code_block = False
43
+
44
+ for line in lines:
45
+ if self._is_code_fence(line):
46
+ inside_code_block = not inside_code_block
47
+ processed_lines.append(line)
48
+ elif inside_code_block:
49
+ processed_lines.append(line)
50
+ else:
51
+ processed_lines.append(self._normalize_to_standard_indentation(line))
52
+
53
+ return "\n".join(processed_lines)
54
+
55
+ def _is_code_fence(self, line: str) -> bool:
56
+ return line.lstrip().startswith(self._code_block_start_delimiter)
57
+
58
+ def _normalize_to_standard_indentation(self, line: str) -> str:
59
+ if self._is_blank_line(line):
60
+ return ""
61
+
62
+ indentation_level = self._round_to_nearest_indentation_level(line)
63
+ content = self._extract_content(line)
64
+
65
+ return self._build_indented_line(indentation_level, content)
66
+
67
+ def _is_blank_line(self, line: str) -> bool:
68
+ return not line.strip()
69
+
70
+ def _round_to_nearest_indentation_level(self, line: str) -> int:
71
+ leading_spaces = self._count_leading_spaces(line)
72
+ return math.ceil(leading_spaces / self._spaces_per_nesting_level)
73
+
74
+ def _count_leading_spaces(self, line: str) -> int:
75
+ return len(line) - len(line.lstrip())
76
+
77
+ def _extract_content(self, line: str) -> str:
78
+ return line.lstrip()
79
+
80
+ def _build_indented_line(self, level: int, content: str) -> str:
81
+ standard_indent = self._create_standard_indent(level)
82
+ return standard_indent + content
83
+
84
+ def _create_standard_indent(self, level: int) -> str:
85
+ spaces = level * self._spaces_per_nesting_level
86
+ return " " * spaces
@@ -0,0 +1,66 @@
1
+ import re
2
+ from typing import override
3
+ from urllib.parse import urlparse
4
+
5
+ from notionary.blocks.enums import VideoFileType
6
+ from notionary.exceptions import UnsupportedVideoFormatError
7
+ from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
8
+ from notionary.page.content.syntax import SyntaxRegistry
9
+ from notionary.utils.decorators import time_execution_sync
10
+ from notionary.utils.mixins.logging import LoggingMixin
11
+
12
+
13
+ class VideoFormatPreProcessor(PreProcessor, LoggingMixin):
14
+ YOUTUBE_WATCH_PATTERN = re.compile(r"^https?://(?:www\.)?youtube\.com/watch\?.*v=[\w-]+", re.IGNORECASE)
15
+ YOUTUBE_EMBED_PATTERN = re.compile(r"^https?://(?:www\.)?youtube\.com/embed/[\w-]+", re.IGNORECASE)
16
+
17
+ def __init__(self, syntax_registry: SyntaxRegistry | None = None) -> None:
18
+ super().__init__()
19
+ self._syntax_registry = syntax_registry or SyntaxRegistry()
20
+ self._video_syntax = self._syntax_registry.get_video_syntax()
21
+
22
+ @override
23
+ @time_execution_sync()
24
+ def process(self, markdown_text: str) -> str:
25
+ lines = markdown_text.split("\n")
26
+ validated_lines = [self._validate_or_reject_line(line) for line in lines]
27
+ return "\n".join(validated_lines)
28
+
29
+ def _validate_or_reject_line(self, line: str) -> str:
30
+ if not self._contains_video_block(line):
31
+ return line
32
+
33
+ url = self._extract_url_from_video_block(line)
34
+
35
+ if self._is_supported_video_url(url):
36
+ return line
37
+
38
+ supported_formats = list(VideoFileType.get_all_extensions())
39
+ raise UnsupportedVideoFormatError(url, supported_formats)
40
+
41
+ def _contains_video_block(self, line: str) -> bool:
42
+ return self._video_syntax.regex_pattern.search(line) is not None
43
+
44
+ def _extract_url_from_video_block(self, line: str) -> str:
45
+ match = self._video_syntax.regex_pattern.search(line)
46
+ return match.group(1).strip() if match else ""
47
+
48
+ def _is_supported_video_url(self, url: str) -> bool:
49
+ return (
50
+ self._is_youtube_video(url)
51
+ or self._has_valid_video_extension(url)
52
+ or self._url_path_has_valid_extension(url)
53
+ )
54
+
55
+ def _is_youtube_video(self, url: str) -> bool:
56
+ return bool(self.YOUTUBE_WATCH_PATTERN.match(url) or self.YOUTUBE_EMBED_PATTERN.match(url))
57
+
58
+ def _has_valid_video_extension(self, url: str) -> bool:
59
+ return VideoFileType.is_valid_extension(url)
60
+
61
+ def _url_path_has_valid_extension(self, url: str) -> bool:
62
+ try:
63
+ parsed_url = urlparse(url)
64
+ return VideoFileType.is_valid_extension(parsed_url.path.lower())
65
+ except Exception:
66
+ return False
@@ -1,10 +1,12 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
4
+ from notionary.utils.decorators import time_execution_sync
4
5
 
5
6
 
6
7
  class WhitespacePreProcessor(PreProcessor):
7
8
  @override
9
+ @time_execution_sync()
8
10
  def process(self, markdown_text: str) -> str:
9
11
  if not markdown_text:
10
12
  return ""
@@ -12,23 +14,34 @@ class WhitespacePreProcessor(PreProcessor):
12
14
  lines = markdown_text.split("\n")
13
15
  processed_lines = []
14
16
  code_block_lines = []
17
+ non_code_lines = []
15
18
  in_code_block = False
16
19
 
17
20
  for line in lines:
18
21
  if self._is_code_fence(line):
19
22
  if in_code_block:
23
+ # Format and add code block
20
24
  processed_lines.extend(self._format_code_block(code_block_lines))
21
25
  processed_lines.append("```")
22
26
  code_block_lines = []
23
27
  in_code_block = False
24
28
  else:
29
+ # Format accumulated non-code lines before starting code block
30
+ if non_code_lines:
31
+ processed_lines.extend(self._format_code_block(non_code_lines))
32
+ non_code_lines = []
33
+
25
34
  language = self._extract_language(line)
26
35
  processed_lines.append(f"```{language}")
27
36
  in_code_block = True
28
37
  elif in_code_block:
29
38
  code_block_lines.append(line)
30
39
  else:
31
- processed_lines.append(line.lstrip())
40
+ non_code_lines.append(line)
41
+
42
+ # Format remaining non-code lines at the end
43
+ if non_code_lines:
44
+ processed_lines.extend(self._format_code_block(non_code_lines))
32
45
 
33
46
  return "\n".join(processed_lines)
34
47
 
@@ -39,12 +52,6 @@ class WhitespacePreProcessor(PreProcessor):
39
52
  return fence_line.lstrip().removeprefix("```").strip()
40
53
 
41
54
  def _format_code_block(self, lines: list[str]) -> list[str]:
42
- """
43
- Format code block by removing common leading whitespace.
44
-
45
- Preserves relative indentation between lines.
46
- Empty lines are preserved as-is.
47
- """
48
55
  if not lines:
49
56
  return []
50
57
 
@@ -33,6 +33,8 @@ class MarkdownToNotionConverter(LoggingMixin):
33
33
  parent_stack: list[ParentBlockContext] = []
34
34
 
35
35
  current_line_index = 0
36
+ previous_line_was_empty = False
37
+
36
38
  while current_line_index < len(lines):
37
39
  line = lines[current_line_index]
38
40
 
@@ -42,9 +44,11 @@ class MarkdownToNotionConverter(LoggingMixin):
42
44
  line_index=current_line_index,
43
45
  result_blocks=result_blocks,
44
46
  parent_stack=parent_stack,
47
+ is_previous_line_empty=previous_line_was_empty,
45
48
  )
46
49
 
47
50
  await self._line_parser.handle(context)
51
+ previous_line_was_empty = self._is_processed_line_empty(line)
48
52
 
49
53
  current_line_index += 1 + context.lines_consumed
50
54
 
@@ -57,6 +61,7 @@ class MarkdownToNotionConverter(LoggingMixin):
57
61
  line_index: int,
58
62
  result_blocks: list[BlockCreatePayload],
59
63
  parent_stack: list[ParentBlockContext],
64
+ is_previous_line_empty: bool = False,
60
65
  ) -> BlockParsingContext:
61
66
  return BlockParsingContext(
62
67
  line=line,
@@ -66,4 +71,8 @@ class MarkdownToNotionConverter(LoggingMixin):
66
71
  all_lines=lines,
67
72
  current_line_index=line_index,
68
73
  lines_consumed=0,
74
+ is_previous_line_empty=is_previous_line_empty,
69
75
  )
76
+
77
+ def _is_processed_line_empty(self, line: str) -> bool:
78
+ return line.strip() == ""
@@ -1,6 +1,7 @@
1
1
  from collections.abc import Awaitable, Callable
2
2
 
3
3
  from notionary.blocks.schemas import Block
4
+ from notionary.page.content.syntax.grammar import MarkdownGrammar
4
5
 
5
6
  ConvertChildrenCallback = Callable[[list[Block], int], Awaitable[str]]
6
7
 
@@ -11,13 +12,15 @@ class MarkdownRenderingContext:
11
12
  block: Block,
12
13
  indent_level: int,
13
14
  convert_children_callback: ConvertChildrenCallback | None = None,
15
+ markdown_grammar: MarkdownGrammar | None = None,
14
16
  ) -> None:
15
17
  self.block = block
16
18
  self.indent_level = indent_level
17
19
  self.convert_children_callback = convert_children_callback
20
+ markdown_grammar = markdown_grammar or MarkdownGrammar()
21
+ self._spaces_per_nesting_level = markdown_grammar.spaces_per_nesting_level
18
22
 
19
23
  self.markdown_result: str | None = None
20
- self._markdown_indentation_multiplier = 4
21
24
 
22
25
  async def render_children(self) -> str:
23
26
  return await self._convert_children_to_markdown(self.indent_level)
@@ -43,6 +46,6 @@ class MarkdownRenderingContext:
43
46
  if not text:
44
47
  return text
45
48
 
46
- spaces = " " * self._markdown_indentation_multiplier * self.indent_level
49
+ spaces = " " * self._spaces_per_nesting_level * self.indent_level
47
50
  lines = text.split("\n")
48
51
  return "\n".join(f"{spaces}{line}" if line.strip() else line for line in lines)
@@ -26,11 +26,10 @@ from notionary.page.content.renderer.renderers import (
26
26
  TableRenderer,
27
27
  TableRowHandler,
28
28
  TodoRenderer,
29
- ToggleableHeadingRenderer,
30
29
  ToggleRenderer,
31
30
  VideoRenderer,
32
31
  )
33
- from notionary.page.content.syntax.service import SyntaxRegistry
32
+ from notionary.page.content.syntax import SyntaxRegistry
34
33
 
35
34
 
36
35
  class RendererChainFactory:
@@ -45,7 +44,6 @@ class RendererChainFactory:
45
44
  def create(self) -> BlockRenderer:
46
45
  # Strukturelle Blocks
47
46
  toggle_handler = self._create_toggle_renderer()
48
- toggleable_heading_handler = self._create_toggleable_heading_renderer()
49
47
  heading_handler = self._create_heading_renderer()
50
48
 
51
49
  # Content Blocks
@@ -83,8 +81,7 @@ class RendererChainFactory:
83
81
 
84
82
  # Chain verketten - most specific first, fallback last
85
83
  (
86
- toggle_handler.set_next(toggleable_heading_handler)
87
- .set_next(heading_handler)
84
+ toggle_handler.set_next(heading_handler)
88
85
  .set_next(callout_handler)
89
86
  .set_next(code_handler)
90
87
  .set_next(quote_handler)
@@ -119,12 +116,6 @@ class RendererChainFactory:
119
116
  rich_text_markdown_converter=self._rich_text_markdown_converter,
120
117
  )
121
118
 
122
- def _create_toggleable_heading_renderer(self) -> ToggleableHeadingRenderer:
123
- return ToggleableHeadingRenderer(
124
- syntax_registry=self._syntax_registry,
125
- rich_text_markdown_converter=self._rich_text_markdown_converter,
126
- )
127
-
128
119
  def _create_heading_renderer(self) -> HeadingRenderer:
129
120
  return HeadingRenderer(
130
121
  syntax_registry=self._syntax_registry,
@@ -1,5 +1,5 @@
1
- from .numbered_list_placeholdere import NumberedListPlaceholderReplaceerPostProcessor
1
+ from .numbered_list import NumberedListPlaceholderReplacerPostProcessor
2
2
 
3
3
  __all__ = [
4
- "NumberedListPlaceholderReplaceerPostProcessor",
4
+ "NumberedListPlaceholderReplacerPostProcessor",
5
5
  ]