notionary 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. notionary/blocks/_bootstrap.py +9 -1
  2. notionary/blocks/audio/audio_element.py +53 -28
  3. notionary/blocks/audio/audio_markdown_node.py +10 -4
  4. notionary/blocks/base_block_element.py +15 -3
  5. notionary/blocks/bookmark/bookmark_element.py +39 -36
  6. notionary/blocks/bookmark/bookmark_markdown_node.py +16 -17
  7. notionary/blocks/breadcrumbs/breadcrumb_element.py +2 -2
  8. notionary/blocks/bulleted_list/bulleted_list_element.py +21 -4
  9. notionary/blocks/callout/callout_element.py +20 -4
  10. notionary/blocks/child_database/__init__.py +11 -4
  11. notionary/blocks/child_database/child_database_element.py +59 -0
  12. notionary/blocks/child_database/child_database_models.py +7 -14
  13. notionary/blocks/child_page/child_page_element.py +94 -0
  14. notionary/blocks/client.py +0 -1
  15. notionary/blocks/code/code_element.py +51 -2
  16. notionary/blocks/code/code_markdown_node.py +52 -1
  17. notionary/blocks/column/column_element.py +9 -3
  18. notionary/blocks/column/column_list_element.py +18 -3
  19. notionary/blocks/divider/divider_element.py +3 -11
  20. notionary/blocks/embed/embed_element.py +27 -6
  21. notionary/blocks/equation/equation_element.py +94 -41
  22. notionary/blocks/equation/equation_element_markdown_node.py +8 -9
  23. notionary/blocks/file/file_element.py +56 -37
  24. notionary/blocks/file/file_element_markdown_node.py +9 -7
  25. notionary/blocks/guards.py +22 -0
  26. notionary/blocks/heading/heading_element.py +23 -4
  27. notionary/blocks/image_block/image_element.py +43 -38
  28. notionary/blocks/image_block/image_markdown_node.py +10 -5
  29. notionary/blocks/mixins/captions/__init__.py +4 -0
  30. notionary/blocks/mixins/captions/caption_markdown_node_mixin.py +31 -0
  31. notionary/blocks/mixins/captions/caption_mixin.py +92 -0
  32. notionary/blocks/models.py +3 -1
  33. notionary/blocks/numbered_list/numbered_list_element.py +21 -4
  34. notionary/blocks/paragraph/paragraph_element.py +21 -5
  35. notionary/blocks/pdf/pdf_element.py +47 -41
  36. notionary/blocks/pdf/pdf_markdown_node.py +9 -7
  37. notionary/blocks/quote/quote_element.py +26 -9
  38. notionary/blocks/quote/quote_markdown_node.py +2 -2
  39. notionary/blocks/registry/block_registry.py +1 -46
  40. notionary/blocks/registry/block_registry_builder.py +8 -0
  41. notionary/blocks/rich_text/rich_text_models.py +62 -29
  42. notionary/blocks/rich_text/text_inline_formatter.py +432 -101
  43. notionary/blocks/syntax_prompt_builder.py +137 -0
  44. notionary/blocks/table/table_element.py +110 -9
  45. notionary/blocks/table_of_contents/table_of_contents_element.py +19 -2
  46. notionary/blocks/todo/todo_element.py +21 -4
  47. notionary/blocks/toggle/toggle_element.py +19 -3
  48. notionary/blocks/toggle/toggle_markdown_node.py +1 -1
  49. notionary/blocks/toggleable_heading/toggleable_heading_element.py +19 -4
  50. notionary/blocks/types.py +69 -0
  51. notionary/blocks/video/video_element.py +44 -39
  52. notionary/blocks/video/video_markdown_node.py +10 -5
  53. notionary/comments/__init__.py +26 -0
  54. notionary/comments/client.py +211 -0
  55. notionary/comments/models.py +129 -0
  56. notionary/database/client.py +23 -0
  57. notionary/file_upload/models.py +2 -2
  58. notionary/markdown/markdown_builder.py +34 -27
  59. notionary/page/client.py +21 -6
  60. notionary/page/notion_page.py +77 -2
  61. notionary/page/page_content_deleting_service.py +117 -0
  62. notionary/page/page_content_writer.py +89 -113
  63. notionary/page/page_context.py +64 -0
  64. notionary/page/reader/handler/__init__.py +2 -0
  65. notionary/page/reader/handler/base_block_renderer.py +4 -4
  66. notionary/page/reader/handler/block_rendering_context.py +5 -0
  67. notionary/page/reader/handler/line_renderer.py +16 -3
  68. notionary/page/reader/handler/numbered_list_renderer.py +85 -0
  69. notionary/page/reader/page_content_retriever.py +17 -5
  70. notionary/page/writer/handler/__init__.py +2 -0
  71. notionary/page/writer/handler/code_handler.py +12 -40
  72. notionary/page/writer/handler/column_handler.py +12 -12
  73. notionary/page/writer/handler/column_list_handler.py +13 -13
  74. notionary/page/writer/handler/equation_handler.py +74 -0
  75. notionary/page/writer/handler/line_handler.py +4 -4
  76. notionary/page/writer/handler/regular_line_handler.py +31 -37
  77. notionary/page/writer/handler/table_handler.py +8 -72
  78. notionary/page/writer/handler/toggle_handler.py +14 -12
  79. notionary/page/writer/handler/toggleable_heading_handler.py +22 -16
  80. notionary/page/writer/markdown_to_notion_converter.py +28 -9
  81. notionary/page/writer/markdown_to_notion_converter_context.py +30 -0
  82. notionary/page/writer/markdown_to_notion_formatting_post_processor.py +73 -0
  83. notionary/page/writer/markdown_to_notion_post_processor.py +0 -0
  84. notionary/page/writer/markdown_to_notion_text_length_post_processor.py +0 -0
  85. notionary/page/writer/notion_text_length_processor.py +150 -0
  86. notionary/shared/__init__.py +5 -0
  87. notionary/shared/name_to_id_resolver.py +203 -0
  88. notionary/telemetry/service.py +0 -1
  89. notionary/user/notion_user_manager.py +22 -95
  90. notionary/util/concurrency_limiter.py +0 -0
  91. notionary/workspace.py +4 -4
  92. notionary-0.2.23.dist-info/METADATA +235 -0
  93. {notionary-0.2.21.dist-info → notionary-0.2.23.dist-info}/RECORD +96 -77
  94. notionary/page/markdown_whitespace_processor.py +0 -80
  95. notionary/page/notion_text_length_utils.py +0 -119
  96. notionary/user/notion_user_provider.py +0 -1
  97. notionary-0.2.21.dist-info/METADATA +0 -229
  98. /notionary/page/reader/handler/{context.py → equation_renderer.py} +0 -0
  99. {notionary-0.2.21.dist-info → notionary-0.2.23.dist-info}/LICENSE +0 -0
  100. {notionary-0.2.21.dist-info → notionary-0.2.23.dist-info}/WHEEL +0 -0
@@ -1,9 +1,6 @@
1
1
  import re
2
2
 
3
- from notionary.blocks.rich_text.rich_text_models import RichTextObject
4
- from notionary.blocks.rich_text.text_inline_formatter import TextInlineFormatter
5
3
  from notionary.blocks.table.table_element import TableElement
6
- from notionary.blocks.table.table_models import CreateTableRowBlock, TableRowBlock
7
4
  from notionary.page.writer.handler import LineHandler, LineProcessingContext
8
5
 
9
6
 
@@ -20,11 +17,11 @@ class TableHandler(LineHandler):
20
17
  return False
21
18
  return self._is_table_start(context)
22
19
 
23
- def _process(self, context: LineProcessingContext) -> None:
20
+ async def _process(self, context: LineProcessingContext) -> None:
24
21
  if not self._is_table_start(context):
25
22
  return
26
23
 
27
- self._process_complete_table(context)
24
+ await self._process_complete_table(context)
28
25
  context.was_processed = True
29
26
  context.should_continue = True
30
27
 
@@ -36,16 +33,8 @@ class TableHandler(LineHandler):
36
33
  """Check if this line starts a table."""
37
34
  return self._table_row_pattern.match(context.line.strip()) is not None
38
35
 
39
- def _process_complete_table(self, context: LineProcessingContext) -> None:
40
- """Process the entire table in one go."""
41
- # Create table element
42
- table_element = TableElement()
43
- result = table_element.markdown_to_notion(context.line)
44
- if not result:
45
- return
46
-
47
- block = result if not isinstance(result, list) else result[0]
48
-
36
+ async def _process_complete_table(self, context: LineProcessingContext) -> None:
37
+ """Process the entire table in one go using TableElement."""
49
38
  # Collect all table lines (including the current one)
50
39
  table_lines = [context.line]
51
40
  remaining_lines = context.get_remaining_lines()
@@ -68,63 +57,10 @@ class TableHandler(LineHandler):
68
57
  lines_to_consume = i
69
58
  break
70
59
  else:
71
- # Consumed all remaining lines
72
60
  lines_to_consume = len(remaining_lines)
73
61
 
74
- # Process the table content
75
- table_rows, separator_found = self._process_table_lines(table_lines)
76
-
77
- table = block.table
78
- table.children = table_rows
79
- table.has_column_header = bool(separator_found)
80
-
81
- # Tell the main loop to skip the consumed lines
82
- context.lines_consumed = lines_to_consume
83
- context.result_blocks.append(block)
84
-
85
- def _process_table_lines(
86
- self, table_lines: list[str]
87
- ) -> tuple[list[CreateTableRowBlock], bool]:
88
- """Process all table lines and return rows and separator status."""
89
- table_rows = []
90
- separator_found = False
91
-
92
- for line in table_lines:
93
- line = line.strip()
94
- if not line:
95
- continue
96
-
97
- if self._is_separator_line(line):
98
- separator_found = True
99
- continue
100
-
101
- if self._table_row_pattern.match(line):
102
- table_row = self._create_table_row_from_line(line)
103
- table_rows.append(table_row)
104
-
105
- return table_rows, separator_found
106
-
107
- def _is_separator_line(self, line: str) -> bool:
108
- return self._separator_pattern.match(line) is not None
109
-
110
- def _create_table_row_from_line(self, line: str) -> CreateTableRowBlock:
111
- cells = self._parse_table_row(line)
112
- rich_text_cells = [self._convert_cell_to_rich_text(cell) for cell in cells]
113
- table_row = TableRowBlock(cells=rich_text_cells)
114
- return CreateTableRowBlock(table_row=table_row)
115
-
116
- def _convert_cell_to_rich_text(self, cell: str) -> list[RichTextObject]:
117
- rich_text = TextInlineFormatter.parse_inline_formatting(cell)
118
- if not rich_text:
119
- rich_text = [RichTextObject.from_plain_text(cell)]
120
- return rich_text
121
-
122
- def _parse_table_row(self, row_text: str) -> list[str]:
123
- row_content = row_text.strip()
124
-
125
- if row_content.startswith("|"):
126
- row_content = row_content[1:]
127
- if row_content.endswith("|"):
128
- row_content = row_content[:-1]
62
+ block = await TableElement.create_from_markdown_table(table_lines)
129
63
 
130
- return [cell.strip() for cell in row_content.split("|")]
64
+ if block:
65
+ context.lines_consumed = lines_to_consume
66
+ context.result_blocks.append(block)
@@ -25,15 +25,15 @@ class ToggleHandler(LineHandler):
25
25
  or self._is_toggle_content(context)
26
26
  )
27
27
 
28
- def _process(self, context: LineProcessingContext) -> None:
28
+ async def _process(self, context: LineProcessingContext) -> None:
29
29
  # Explicit, readable branches (small duplication is acceptable)
30
30
  if self._is_toggle_start(context):
31
- self._start_toggle(context)
31
+ await self._start_toggle(context)
32
32
  context.was_processed = True
33
33
  context.should_continue = True
34
34
 
35
35
  if self._is_toggle_end(context):
36
- self._finalize_toggle(context)
36
+ await self._finalize_toggle(context)
37
37
  context.was_processed = True
38
38
  context.should_continue = True
39
39
 
@@ -69,16 +69,16 @@ class ToggleHandler(LineHandler):
69
69
  current_parent = context.parent_stack[-1]
70
70
  return issubclass(current_parent.element_type, ToggleElement)
71
71
 
72
- def _start_toggle(self, context: LineProcessingContext) -> None:
72
+ async def _start_toggle(self, context: LineProcessingContext) -> None:
73
73
  """Start a new toggle block."""
74
74
  toggle_element = ToggleElement()
75
75
 
76
76
  # Create the block
77
- result = toggle_element.markdown_to_notion(context.line)
77
+ result = await toggle_element.markdown_to_notion(context.line)
78
78
  if not result:
79
79
  return
80
80
 
81
- block = result if not isinstance(result, list) else result[0]
81
+ block = result
82
82
 
83
83
  # Push to parent stack
84
84
  parent_context = ParentBlockContext(
@@ -88,12 +88,12 @@ class ToggleHandler(LineHandler):
88
88
  )
89
89
  context.parent_stack.append(parent_context)
90
90
 
91
- def _finalize_toggle(self, context: LineProcessingContext) -> None:
91
+ async def _finalize_toggle(self, context: LineProcessingContext) -> None:
92
92
  """Finalize a toggle block and add it to result_blocks."""
93
93
  toggle_context = context.parent_stack.pop()
94
94
 
95
95
  if toggle_context.has_children():
96
- all_children = self._get_all_children(
96
+ all_children = await self._get_all_children(
97
97
  toggle_context, context.block_registry
98
98
  )
99
99
  toggle_context.block.toggle.children = all_children
@@ -124,7 +124,7 @@ class ToggleHandler(LineHandler):
124
124
  """Add content to the current toggle context."""
125
125
  context.parent_stack[-1].add_child_line(context.line)
126
126
 
127
- def _convert_children_text(self, text: str, block_registry) -> list:
127
+ async def _convert_children_text(self, text: str, block_registry) -> list:
128
128
  """Convert children text to blocks."""
129
129
  from notionary.page.writer.markdown_to_notion_converter import (
130
130
  MarkdownToNotionConverter,
@@ -134,16 +134,18 @@ class ToggleHandler(LineHandler):
134
134
  return []
135
135
 
136
136
  child_converter = MarkdownToNotionConverter(block_registry)
137
- return child_converter._process_lines(text)
137
+ return await child_converter.process_lines(text)
138
138
 
139
- def _get_all_children(self, parent_context, block_registry) -> list:
139
+ async def _get_all_children(self, parent_context, block_registry) -> list:
140
140
  """Helper method to combine text-based and direct block children."""
141
141
  children_blocks = []
142
142
 
143
143
  # Process text lines
144
144
  if parent_context.child_lines:
145
145
  children_text = "\n".join(parent_context.child_lines)
146
- text_blocks = self._convert_children_text(children_text, block_registry)
146
+ text_blocks = await self._convert_children_text(
147
+ children_text, block_registry
148
+ )
147
149
  children_blocks.extend(text_blocks)
148
150
 
149
151
  # Add direct blocks (like processed columns)
@@ -32,21 +32,21 @@ class ToggleableHeadingHandler(LineHandler):
32
32
  or self._is_toggleable_heading_content(context)
33
33
  )
34
34
 
35
- def _process(self, context: LineProcessingContext) -> None:
35
+ async def _process(self, context: LineProcessingContext) -> None:
36
36
  """Process toggleable heading start, end, or content with unified handling."""
37
37
 
38
- def _handle(action):
39
- action(context)
38
+ async def _handle(action):
39
+ await action(context)
40
40
  context.was_processed = True
41
41
  context.should_continue = True
42
42
  return True
43
43
 
44
44
  if self._is_toggleable_heading_start(context):
45
- return _handle(self._start_toggleable_heading)
45
+ return await _handle(self._start_toggleable_heading)
46
46
  if self._is_toggleable_heading_end(context):
47
- return _handle(self._finalize_toggleable_heading)
47
+ return await _handle(self._finalize_toggleable_heading)
48
48
  if self._is_toggleable_heading_content(context):
49
- return _handle(self._add_toggleable_heading_content)
49
+ return await _handle(self._add_toggleable_heading_content)
50
50
 
51
51
  def _is_toggleable_heading_start(self, context: LineProcessingContext) -> bool:
52
52
  """Check if line starts a toggleable heading (+++# "Title")."""
@@ -64,16 +64,16 @@ class ToggleableHeadingHandler(LineHandler):
64
64
  current_parent = context.parent_stack[-1]
65
65
  return issubclass(current_parent.element_type, ToggleableHeadingElement)
66
66
 
67
- def _start_toggleable_heading(self, context: LineProcessingContext) -> None:
67
+ async def _start_toggleable_heading(self, context: LineProcessingContext) -> None:
68
68
  """Start a new toggleable heading block."""
69
69
  toggleable_heading_element = ToggleableHeadingElement()
70
70
 
71
71
  # Create the block
72
- result = toggleable_heading_element.markdown_to_notion(context.line)
72
+ result = await toggleable_heading_element.markdown_to_notion(context.line)
73
73
  if not result:
74
74
  return
75
75
 
76
- block = result if not isinstance(result, list) else result[0]
76
+ block = result
77
77
 
78
78
  # Push to parent stack
79
79
  parent_context = ParentBlockContext(
@@ -96,16 +96,20 @@ class ToggleableHeadingHandler(LineHandler):
96
96
  line = context.line.strip()
97
97
  return not (self._start_pattern.match(line) or self._end_pattern.match(line))
98
98
 
99
- def _add_toggleable_heading_content(self, context: LineProcessingContext) -> None:
99
+ async def _add_toggleable_heading_content(
100
+ self, context: LineProcessingContext
101
+ ) -> None:
100
102
  """Add content to the current toggleable heading context."""
101
103
  context.parent_stack[-1].add_child_line(context.line)
102
104
 
103
- def _finalize_toggleable_heading(self, context: LineProcessingContext) -> None:
105
+ async def _finalize_toggleable_heading(
106
+ self, context: LineProcessingContext
107
+ ) -> None:
104
108
  """Finalize a toggleable heading block and add it to result_blocks."""
105
109
  heading_context = context.parent_stack.pop()
106
110
 
107
111
  if heading_context.has_children():
108
- all_children = self._get_all_children(
112
+ all_children = await self._get_all_children(
109
113
  heading_context, context.block_registry
110
114
  )
111
115
  self._assign_heading_children(heading_context.block, all_children)
@@ -123,7 +127,7 @@ class ToggleableHeadingHandler(LineHandler):
123
127
  # No parent, add to top level
124
128
  context.result_blocks.append(heading_context.block)
125
129
 
126
- def _get_all_children(
130
+ async def _get_all_children(
127
131
  self, parent_context: ParentBlockContext, block_registry
128
132
  ) -> list:
129
133
  """Helper method to combine text-based and direct block children."""
@@ -132,7 +136,9 @@ class ToggleableHeadingHandler(LineHandler):
132
136
  # Process text lines
133
137
  if parent_context.child_lines:
134
138
  children_text = "\n".join(parent_context.child_lines)
135
- text_blocks = self._convert_children_text(children_text, block_registry)
139
+ text_blocks = await self._convert_children_text(
140
+ children_text, block_registry
141
+ )
136
142
  children_blocks.extend(text_blocks)
137
143
 
138
144
  # Add direct blocks
@@ -154,7 +160,7 @@ class ToggleableHeadingHandler(LineHandler):
154
160
  elif block_type == BlockType.HEADING_3:
155
161
  parent_block.heading_3.children = children
156
162
 
157
- def _convert_children_text(self, text: str, block_registry) -> list:
163
+ async def _convert_children_text(self, text: str, block_registry) -> list:
158
164
  """Convert children text to blocks."""
159
165
  from notionary.page.writer.markdown_to_notion_converter import (
160
166
  MarkdownToNotionConverter,
@@ -164,4 +170,4 @@ class ToggleableHeadingHandler(LineHandler):
164
170
  return []
165
171
 
166
172
  child_converter = MarkdownToNotionConverter(block_registry)
167
- return child_converter._process_lines(text)
173
+ return await child_converter.process_lines(text)
@@ -1,10 +1,10 @@
1
1
  from notionary.blocks.models import BlockCreateRequest
2
2
  from notionary.blocks.registry.block_registry import BlockRegistry
3
- from notionary.page.notion_text_length_utils import fix_blocks_content_length
4
3
  from notionary.page.writer.handler import (
5
4
  CodeHandler,
6
5
  ColumnHandler,
7
6
  ColumnListHandler,
7
+ EquationHandler,
8
8
  LineProcessingContext,
9
9
  ParentBlockContext,
10
10
  RegularLineHandler,
@@ -12,6 +12,12 @@ from notionary.page.writer.handler import (
12
12
  ToggleableHeadingHandler,
13
13
  ToggleHandler,
14
14
  )
15
+ from notionary.page.writer.markdown_to_notion_formatting_post_processor import (
16
+ MarkdownToNotionFormattingPostProcessor,
17
+ )
18
+ from notionary.page.writer.notion_text_length_processor import (
19
+ NotionTextLengthProcessor,
20
+ )
15
21
 
16
22
 
17
23
  class MarkdownToNotionConverter:
@@ -19,10 +25,14 @@ class MarkdownToNotionConverter:
19
25
 
20
26
  def __init__(self, block_registry: BlockRegistry) -> None:
21
27
  self._block_registry = block_registry
28
+ self._formatting_post_processor = MarkdownToNotionFormattingPostProcessor()
29
+ self._text_length_post_processor = NotionTextLengthProcessor()
30
+
22
31
  self._setup_handler_chain()
23
32
 
24
33
  def _setup_handler_chain(self) -> None:
25
34
  code_handler = CodeHandler()
35
+ equation_handler = EquationHandler()
26
36
  table_handler = TableHandler()
27
37
  column_list_handler = ColumnListHandler()
28
38
  column_handler = ColumnHandler()
@@ -31,22 +41,31 @@ class MarkdownToNotionConverter:
31
41
  regular_handler = RegularLineHandler()
32
42
 
33
43
  # register more specific elements first
34
- code_handler.set_next(table_handler).set_next(column_list_handler).set_next(
35
- column_handler
36
- ).set_next(toggleable_heading_handler).set_next(toggle_handler).set_next(
44
+ code_handler.set_next(equation_handler).set_next(table_handler).set_next(
45
+ column_list_handler
46
+ ).set_next(column_handler).set_next(toggleable_heading_handler).set_next(
47
+ toggle_handler
48
+ ).set_next(
37
49
  regular_handler
38
50
  )
39
51
 
40
52
  self._handler_chain = code_handler
41
53
 
42
- def convert(self, markdown_text: str) -> list[BlockCreateRequest]:
54
+ async def convert(self, markdown_text: str) -> list[BlockCreateRequest]:
43
55
  if not markdown_text.strip():
44
56
  return []
45
57
 
46
- all_blocks = self._process_lines(markdown_text)
47
- return fix_blocks_content_length(all_blocks)
58
+ all_blocks = await self.process_lines(markdown_text)
59
+
60
+ # Apply formatting post-processing (empty paragraphs)
61
+ all_blocks = self._formatting_post_processor.process(all_blocks)
62
+
63
+ # Apply text length post-processing (truncation)
64
+ all_blocks = self._text_length_post_processor.process(all_blocks)
65
+
66
+ return all_blocks
48
67
 
49
- def _process_lines(self, text: str) -> list[BlockCreateRequest]:
68
+ async def process_lines(self, text: str) -> list[BlockCreateRequest]:
50
69
  lines = text.split("\n")
51
70
  result_blocks: list[BlockCreateRequest] = []
52
71
  parent_stack: list[ParentBlockContext] = []
@@ -65,7 +84,7 @@ class MarkdownToNotionConverter:
65
84
  lines_consumed=0,
66
85
  )
67
86
 
68
- self._handler_chain.handle(context)
87
+ await self._handler_chain.handle(context)
69
88
 
70
89
  # Skip consumed lines
71
90
  i += 1 + context.lines_consumed
@@ -0,0 +1,30 @@
1
+ # notionary/blocks/context/conversion_context.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional, TYPE_CHECKING
5
+ from dataclasses import dataclass
6
+
7
+ if TYPE_CHECKING:
8
+ from notionary.database.client import NotionDatabaseClient
9
+
10
+
11
+ @dataclass
12
+ class ConverterContext:
13
+ """
14
+ Context object that provides dependencies for block conversion operations.
15
+ """
16
+
17
+ page_id: Optional[str] = None
18
+ database_client: Optional["NotionDatabaseClient"] = None
19
+
20
+ def require_database_client(self) -> NotionDatabaseClient:
21
+ """Get database client or raise if not available."""
22
+ if self.database_client is None:
23
+ raise ValueError("Database client required but not provided in context")
24
+ return self.database_client
25
+
26
+ def require_page_id(self) -> str:
27
+ """Get parent page ID or raise if not available."""
28
+ if self.page_id is None:
29
+ raise ValueError("Parent page ID required but not provided in context")
30
+ return self.page_id
@@ -0,0 +1,73 @@
1
+ """
2
+ Post-processor for handling block formatting in Markdown to Notion conversion.
3
+
4
+ Handles block formatting tasks like adding empty paragraphs before media blocks
5
+ and other formatting-related post-processing.
6
+ """
7
+
8
+ from typing import cast
9
+
10
+ from notionary.blocks.models import BlockCreateRequest
11
+ from notionary.blocks.types import BlockType
12
+ from notionary.blocks.paragraph.paragraph_models import (
13
+ CreateParagraphBlock,
14
+ ParagraphBlock,
15
+ )
16
+
17
+
18
+ class MarkdownToNotionFormattingPostProcessor:
19
+ """Handles block formatting post-processing for Notion blocks."""
20
+
21
+ BLOCKS_NEEDING_EMPTY_PARAGRAPH: set[BlockType] = {
22
+ BlockType.DIVIDER,
23
+ BlockType.FILE,
24
+ BlockType.IMAGE,
25
+ BlockType.PDF,
26
+ BlockType.VIDEO,
27
+ }
28
+
29
+ def process(self, blocks: list[BlockCreateRequest]) -> list[BlockCreateRequest]:
30
+ """Process blocks with all formatting steps."""
31
+ if not blocks:
32
+ return blocks
33
+
34
+ return self._add_empty_paragraphs_for_media_blocks(blocks)
35
+
36
+ def _add_empty_paragraphs_for_media_blocks(
37
+ self, blocks: list[BlockCreateRequest]
38
+ ) -> list[BlockCreateRequest]:
39
+ """Add empty paragraphs before configured block types."""
40
+ if not blocks:
41
+ return blocks
42
+
43
+ result: list[BlockCreateRequest] = []
44
+
45
+ for i, block in enumerate(blocks):
46
+ block_type = block.type
47
+
48
+ if (
49
+ block_type in self.BLOCKS_NEEDING_EMPTY_PARAGRAPH
50
+ and i > 0
51
+ and not self._is_empty_paragraph(result[-1] if result else None)
52
+ ):
53
+
54
+ # Create empty paragraph block inline
55
+ empty_paragraph = CreateParagraphBlock(
56
+ paragraph=ParagraphBlock(rich_text=[])
57
+ )
58
+ result.append(empty_paragraph)
59
+
60
+ result.append(block)
61
+
62
+ return result
63
+
64
+ def _is_empty_paragraph(self, block: BlockCreateRequest | None) -> bool:
65
+ if not block or block.type != BlockType.PARAGRAPH:
66
+ return False
67
+ if not isinstance(block, CreateParagraphBlock):
68
+ return False
69
+
70
+ para_block = cast(CreateParagraphBlock, block)
71
+ paragraph: ParagraphBlock | None = para_block.paragraph
72
+ if not paragraph:
73
+ return False
@@ -0,0 +1,150 @@
1
+ """
2
+ Post-processor for handling Notion API text length limitations.
3
+
4
+ Handles text length validation and truncation for blocks that exceed
5
+ Notion's rich_text character limit of 2000 characters per element.
6
+ """
7
+
8
+ from typing import TypeGuard, Union
9
+
10
+ from notionary.blocks.models import BlockCreateRequest
11
+ from notionary.blocks.rich_text.rich_text_models import RichTextObject
12
+ from notionary.blocks.types import HasRichText, HasChildren
13
+ from notionary.util import LoggingMixin
14
+
15
+
16
+ class NotionTextLengthProcessor(LoggingMixin):
17
+ """
18
+ Processes Notion blocks to ensure text content doesn't exceed API limits.
19
+
20
+ The Notion API has a limit of 2000 characters per rich_text element.
21
+ This processor truncates content that exceeds the specified limit.
22
+ """
23
+
24
+ DEFAULT_MAX_LENGTH = 1900 # Leave some buffer under the 2000 limit
25
+
26
+ def __init__(self, max_text_length: int = DEFAULT_MAX_LENGTH) -> None:
27
+ """
28
+ Initialize the processor.
29
+
30
+ Args:
31
+ max_text_length: Maximum allowed text length (default: 1900)
32
+ """
33
+ if max_text_length <= 0:
34
+ raise ValueError("max_text_length must be positive")
35
+ if max_text_length > 2000:
36
+ self.logger.warning(
37
+ "max_text_length (%d) exceeds Notion's limit of 2000 characters",
38
+ max_text_length,
39
+ )
40
+
41
+ self.max_text_length = max_text_length
42
+
43
+ def process(self, blocks: list[BlockCreateRequest]) -> list[BlockCreateRequest]:
44
+ """
45
+ Process blocks to fix text length limits.
46
+ """
47
+ if not blocks:
48
+ return blocks
49
+
50
+ flattened_blocks = self._flatten_block_list(blocks)
51
+ return [self._process_single_block(block) for block in flattened_blocks]
52
+
53
+ def _process_single_block(self, block: BlockCreateRequest) -> BlockCreateRequest:
54
+ """
55
+ Process a single block to fix text length issues.
56
+ """
57
+ block_copy = block.model_copy(deep=True)
58
+
59
+ block_content = self._extract_block_content(block_copy)
60
+
61
+ if block_content is not None:
62
+ self._fix_content_text_lengths(block_content)
63
+
64
+ return block_copy
65
+
66
+ def _extract_block_content(self, block: BlockCreateRequest) -> object | None:
67
+ """
68
+ Extract the content object from a block using type-safe attribute access.
69
+ """
70
+ # Get the block's content using the block type as attribute name
71
+ # We assume block.type always exists as per the BlockCreateRequest structure
72
+ content = getattr(block, block.type, None)
73
+
74
+ # Verify it's a valid content object (has rich_text or children)
75
+ if content and (
76
+ self._is_rich_text_container(content)
77
+ or self._is_children_container(content)
78
+ ):
79
+ return content
80
+
81
+ return None
82
+
83
+ def _fix_content_text_lengths(self, content: object) -> None:
84
+ """
85
+ Fix text lengths in a content object and its children recursively.
86
+ """
87
+ # Process rich_text if present
88
+ if self._is_rich_text_container(content):
89
+ self._truncate_rich_text_content(content.rich_text)
90
+
91
+ # Process children recursively if present
92
+ if self._is_children_container(content):
93
+ for child in content.children:
94
+ child_content = self._extract_block_content(child)
95
+ if child_content:
96
+ self._fix_content_text_lengths(child_content)
97
+
98
+ def _truncate_rich_text_content(self, rich_text_list: list[RichTextObject]) -> None:
99
+ """
100
+ Truncate text content in rich text objects that exceed the limit.
101
+ """
102
+ for rich_text_obj in rich_text_list:
103
+ if not self._is_text_rich_text_object(rich_text_obj):
104
+ continue
105
+
106
+ content = rich_text_obj.text.content
107
+ if len(content) > self.max_text_length:
108
+ self.logger.warning(
109
+ "Truncating text content from %d to %d characters",
110
+ len(content),
111
+ self.max_text_length,
112
+ )
113
+ # Truncate the content
114
+ rich_text_obj.text.content = content[: self.max_text_length]
115
+
116
+ def _flatten_block_list(
117
+ self, blocks: list[Union[BlockCreateRequest, list]]
118
+ ) -> list[BlockCreateRequest]:
119
+ """
120
+ Flatten a potentially nested list of blocks.
121
+ """
122
+ flattened: list[BlockCreateRequest] = []
123
+
124
+ for item in blocks:
125
+ if isinstance(item, list):
126
+ # Recursively flatten nested lists
127
+ flattened.extend(self._flatten_block_list(item))
128
+ else:
129
+ # Add individual block
130
+ flattened.append(item)
131
+
132
+ return flattened
133
+
134
+ def _is_rich_text_container(self, obj: object) -> TypeGuard[HasRichText]:
135
+ """Type guard to check if an object has rich_text attribute."""
136
+ return hasattr(obj, "rich_text") and isinstance(getattr(obj, "rich_text"), list)
137
+
138
+ def _is_children_container(self, obj: object) -> TypeGuard[HasChildren]:
139
+ """Type guard to check if an object has children attribute."""
140
+ return hasattr(obj, "children") and isinstance(getattr(obj, "children"), list)
141
+
142
+ def _is_text_rich_text_object(
143
+ self, rich_text_obj: RichTextObject
144
+ ) -> TypeGuard[RichTextObject]:
145
+ """Type guard to check if a RichTextObject is of type 'text' with content."""
146
+ return (
147
+ rich_text_obj.type == "text"
148
+ and rich_text_obj.text is not None
149
+ and rich_text_obj.text.content is not None
150
+ )
@@ -0,0 +1,5 @@
1
+ from .name_to_id_resolver import NameIdResolver
2
+
3
+ __all__ = [
4
+ "NameIdResolver"
5
+ ]