notionary 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. notionary/__init__.py +44 -1
  2. notionary/blocks/client.py +37 -11
  3. notionary/blocks/rich_text/markdown_rich_text_converter.py +49 -15
  4. notionary/blocks/rich_text/models.py +13 -4
  5. notionary/blocks/rich_text/name_id_resolver/data_source.py +9 -3
  6. notionary/blocks/rich_text/name_id_resolver/person.py +6 -2
  7. notionary/blocks/rich_text/rich_text_markdown_converter.py +10 -3
  8. notionary/blocks/schemas.py +2 -1
  9. notionary/comments/client.py +19 -6
  10. notionary/comments/factory.py +10 -3
  11. notionary/comments/schemas.py +9 -3
  12. notionary/comments/service.py +12 -4
  13. notionary/data_source/http/data_source_instance_client.py +59 -17
  14. notionary/data_source/properties/schemas.py +30 -10
  15. notionary/data_source/query/builder.py +67 -18
  16. notionary/data_source/query/resolver.py +16 -5
  17. notionary/data_source/query/schema.py +24 -6
  18. notionary/data_source/query/validator.py +18 -6
  19. notionary/data_source/schema/registry.py +31 -12
  20. notionary/data_source/schema/service.py +66 -20
  21. notionary/data_source/service.py +74 -23
  22. notionary/database/client.py +27 -9
  23. notionary/database/database_metadata_update_client.py +12 -4
  24. notionary/database/service.py +11 -4
  25. notionary/exceptions/__init__.py +15 -3
  26. notionary/exceptions/block_parsing.py +6 -2
  27. notionary/exceptions/data_source/builder.py +11 -5
  28. notionary/exceptions/data_source/properties.py +3 -1
  29. notionary/exceptions/file_upload.py +12 -3
  30. notionary/exceptions/properties.py +3 -1
  31. notionary/exceptions/search.py +6 -2
  32. notionary/file_upload/client.py +5 -1
  33. notionary/file_upload/config/config.py +10 -3
  34. notionary/file_upload/query/builder.py +6 -2
  35. notionary/file_upload/schemas.py +3 -1
  36. notionary/file_upload/service.py +42 -14
  37. notionary/file_upload/validation/factory.py +3 -1
  38. notionary/file_upload/validation/impl/file_name_length.py +3 -1
  39. notionary/file_upload/validation/models.py +15 -5
  40. notionary/file_upload/validation/validators/file_extension.py +12 -3
  41. notionary/http/client.py +27 -8
  42. notionary/page/content/__init__.py +9 -0
  43. notionary/page/content/factory.py +21 -7
  44. notionary/page/content/markdown/builder.py +85 -23
  45. notionary/page/content/markdown/nodes/audio.py +8 -4
  46. notionary/page/content/markdown/nodes/base.py +3 -3
  47. notionary/page/content/markdown/nodes/bookmark.py +5 -3
  48. notionary/page/content/markdown/nodes/breadcrumb.py +2 -2
  49. notionary/page/content/markdown/nodes/bulleted_list.py +5 -3
  50. notionary/page/content/markdown/nodes/callout.py +2 -2
  51. notionary/page/content/markdown/nodes/code.py +5 -3
  52. notionary/page/content/markdown/nodes/columns.py +3 -3
  53. notionary/page/content/markdown/nodes/container.py +9 -5
  54. notionary/page/content/markdown/nodes/divider.py +2 -2
  55. notionary/page/content/markdown/nodes/embed.py +8 -4
  56. notionary/page/content/markdown/nodes/equation.py +4 -2
  57. notionary/page/content/markdown/nodes/file.py +8 -4
  58. notionary/page/content/markdown/nodes/heading.py +2 -2
  59. notionary/page/content/markdown/nodes/image.py +8 -4
  60. notionary/page/content/markdown/nodes/mixins/caption.py +5 -3
  61. notionary/page/content/markdown/nodes/numbered_list.py +5 -3
  62. notionary/page/content/markdown/nodes/paragraph.py +4 -2
  63. notionary/page/content/markdown/nodes/pdf.py +8 -4
  64. notionary/page/content/markdown/nodes/quote.py +2 -2
  65. notionary/page/content/markdown/nodes/space.py +2 -2
  66. notionary/page/content/markdown/nodes/table.py +8 -5
  67. notionary/page/content/markdown/nodes/table_of_contents.py +2 -2
  68. notionary/page/content/markdown/nodes/todo.py +15 -7
  69. notionary/page/content/markdown/nodes/toggle.py +2 -2
  70. notionary/page/content/markdown/nodes/video.py +8 -4
  71. notionary/page/content/markdown/structured_output/__init__.py +73 -0
  72. notionary/page/content/markdown/structured_output/models.py +391 -0
  73. notionary/page/content/markdown/structured_output/service.py +211 -0
  74. notionary/page/content/parser/context.py +1 -1
  75. notionary/page/content/parser/factory.py +23 -8
  76. notionary/page/content/parser/parsers/audio.py +7 -2
  77. notionary/page/content/parser/parsers/base.py +2 -2
  78. notionary/page/content/parser/parsers/bookmark.py +2 -2
  79. notionary/page/content/parser/parsers/breadcrumb.py +2 -2
  80. notionary/page/content/parser/parsers/bulleted_list.py +19 -6
  81. notionary/page/content/parser/parsers/callout.py +15 -5
  82. notionary/page/content/parser/parsers/caption.py +9 -3
  83. notionary/page/content/parser/parsers/code.py +21 -7
  84. notionary/page/content/parser/parsers/column.py +8 -4
  85. notionary/page/content/parser/parsers/column_list.py +19 -7
  86. notionary/page/content/parser/parsers/divider.py +2 -2
  87. notionary/page/content/parser/parsers/embed.py +2 -2
  88. notionary/page/content/parser/parsers/equation.py +8 -4
  89. notionary/page/content/parser/parsers/file.py +7 -2
  90. notionary/page/content/parser/parsers/file_like_block.py +30 -10
  91. notionary/page/content/parser/parsers/heading.py +31 -10
  92. notionary/page/content/parser/parsers/image.py +7 -2
  93. notionary/page/content/parser/parsers/numbered_list.py +18 -6
  94. notionary/page/content/parser/parsers/paragraph.py +3 -1
  95. notionary/page/content/parser/parsers/pdf.py +7 -2
  96. notionary/page/content/parser/parsers/quote.py +28 -9
  97. notionary/page/content/parser/parsers/space.py +2 -2
  98. notionary/page/content/parser/parsers/table.py +31 -10
  99. notionary/page/content/parser/parsers/table_of_contents.py +7 -3
  100. notionary/page/content/parser/parsers/todo.py +15 -5
  101. notionary/page/content/parser/parsers/toggle.py +15 -5
  102. notionary/page/content/parser/parsers/video.py +7 -2
  103. notionary/page/content/parser/post_processing/handlers/rich_text_length.py +8 -2
  104. notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +8 -2
  105. notionary/page/content/parser/post_processing/service.py +3 -1
  106. notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +21 -7
  107. notionary/page/content/parser/pre_processsing/handlers/indentation.py +11 -4
  108. notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +13 -6
  109. notionary/page/content/parser/service.py +4 -1
  110. notionary/page/content/renderer/context.py +15 -5
  111. notionary/page/content/renderer/factory.py +12 -6
  112. notionary/page/content/renderer/post_processing/handlers/numbered_list.py +19 -9
  113. notionary/page/content/renderer/renderers/audio.py +14 -5
  114. notionary/page/content/renderer/renderers/base.py +3 -3
  115. notionary/page/content/renderer/renderers/bookmark.py +3 -1
  116. notionary/page/content/renderer/renderers/bulleted_list.py +11 -5
  117. notionary/page/content/renderer/renderers/callout.py +19 -7
  118. notionary/page/content/renderer/renderers/captioned_block.py +11 -5
  119. notionary/page/content/renderer/renderers/code.py +6 -2
  120. notionary/page/content/renderer/renderers/column.py +3 -1
  121. notionary/page/content/renderer/renderers/column_list.py +3 -1
  122. notionary/page/content/renderer/renderers/embed.py +3 -1
  123. notionary/page/content/renderer/renderers/equation.py +3 -1
  124. notionary/page/content/renderer/renderers/file.py +14 -5
  125. notionary/page/content/renderer/renderers/file_like_block.py +8 -4
  126. notionary/page/content/renderer/renderers/heading.py +22 -8
  127. notionary/page/content/renderer/renderers/image.py +13 -4
  128. notionary/page/content/renderer/renderers/numbered_list.py +8 -3
  129. notionary/page/content/renderer/renderers/paragraph.py +12 -4
  130. notionary/page/content/renderer/renderers/pdf.py +14 -5
  131. notionary/page/content/renderer/renderers/quote.py +14 -6
  132. notionary/page/content/renderer/renderers/table.py +15 -5
  133. notionary/page/content/renderer/renderers/todo.py +16 -6
  134. notionary/page/content/renderer/renderers/toggle.py +8 -4
  135. notionary/page/content/renderer/renderers/video.py +14 -5
  136. notionary/page/content/renderer/service.py +9 -3
  137. notionary/page/content/service.py +21 -7
  138. notionary/page/content/syntax/definition/__init__.py +11 -0
  139. notionary/page/content/syntax/definition/models.py +57 -0
  140. notionary/page/content/syntax/definition/registry.py +371 -0
  141. notionary/page/content/syntax/prompts/__init__.py +4 -0
  142. notionary/page/content/syntax/prompts/models.py +11 -0
  143. notionary/page/content/syntax/prompts/registry.py +703 -0
  144. notionary/page/page_metadata_update_client.py +12 -4
  145. notionary/page/properties/client.py +45 -15
  146. notionary/page/properties/factory.py +6 -2
  147. notionary/page/properties/service.py +110 -36
  148. notionary/page/service.py +20 -6
  149. notionary/shared/entity/client.py +6 -2
  150. notionary/shared/entity/dto_parsers.py +3 -1
  151. notionary/shared/entity/entity_metadata_update_client.py +9 -3
  152. notionary/shared/entity/schemas.py +1 -1
  153. notionary/shared/entity/service.py +53 -22
  154. notionary/shared/models/file.py +3 -1
  155. notionary/shared/models/icon.py +6 -4
  156. notionary/user/base.py +6 -2
  157. notionary/user/bot.py +10 -2
  158. notionary/user/client.py +3 -1
  159. notionary/user/person.py +3 -1
  160. notionary/user/schemas.py +3 -1
  161. notionary/user/service.py +6 -2
  162. notionary/utils/decorators.py +6 -2
  163. notionary/utils/fuzzy.py +6 -2
  164. notionary/utils/mixins/logging.py +3 -1
  165. notionary/utils/pagination.py +14 -4
  166. notionary/workspace/__init__.py +5 -1
  167. notionary/workspace/query/service.py +59 -16
  168. notionary/workspace/service.py +39 -11
  169. {notionary-0.4.0.dist-info → notionary-0.4.2.dist-info}/METADATA +1 -1
  170. notionary-0.4.2.dist-info/RECORD +236 -0
  171. notionary/page/blocks/client.py +0 -1
  172. notionary/page/content/syntax/__init__.py +0 -5
  173. notionary/page/content/syntax/models.py +0 -66
  174. notionary/page/content/syntax/registry.py +0 -371
  175. notionary-0.4.0.dist-info/RECORD +0 -230
  176. /notionary/page/content/syntax/{grammar.py → definition/grammar.py} +0 -0
  177. {notionary-0.4.0.dist-info → notionary-0.4.2.dist-info}/WHEEL +0 -0
  178. {notionary-0.4.0.dist-info → notionary-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,7 @@ from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
4
  from notionary.page.content.markdown.nodes.container import ContainerNode
5
- from notionary.page.content.syntax import SyntaxRegistry
5
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
6
6
 
7
7
 
8
8
  class QuoteMarkdownNode(ContainerNode):
@@ -10,7 +10,7 @@ class QuoteMarkdownNode(ContainerNode):
10
10
  self,
11
11
  text: str,
12
12
  children: list[MarkdownNode] | None = None,
13
- syntax_registry: SyntaxRegistry | None = None,
13
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
14
14
  ) -> None:
15
15
  super().__init__(syntax_registry=syntax_registry)
16
16
  self.text = text
@@ -1,11 +1,11 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
- from notionary.page.content.syntax import SyntaxRegistry
4
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
5
5
 
6
6
 
7
7
  class SpaceMarkdownNode(MarkdownNode):
8
- def __init__(self, syntax_registry: SyntaxRegistry | None = None) -> None:
8
+ def __init__(self, syntax_registry: SyntaxDefinitionRegistry | None = None) -> None:
9
9
  super().__init__(syntax_registry=syntax_registry)
10
10
 
11
11
  @override
@@ -1,12 +1,15 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
- from notionary.page.content.syntax import SyntaxRegistry
4
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
5
5
 
6
6
 
7
7
  class TableMarkdownNode(MarkdownNode):
8
8
  def __init__(
9
- self, headers: list[str], rows: list[list[str]], syntax_registry: SyntaxRegistry | None = None
9
+ self,
10
+ headers: list[str],
11
+ rows: list[list[str]],
12
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
10
13
  ) -> None:
11
14
  super().__init__(syntax_registry=syntax_registry)
12
15
  self._validate_input(headers, rows)
@@ -36,10 +39,10 @@ class TableMarkdownNode(MarkdownNode):
36
39
  return f"{delimiter} {joined_cells} {delimiter}"
37
40
 
38
41
  def _build_separator_row(self) -> str:
39
- table_syntax = self._syntax_registry.get_table_syntax()
40
42
  col_count = len(self.headers)
41
- separators = [table_syntax.end_delimiter] * col_count
42
- return self._format_row(separators)
43
+ separators = ["-"] * col_count
44
+ row = self._format_row(separators)
45
+ return row
43
46
 
44
47
  def _build_data_rows(self) -> list[str]:
45
48
  return [self._format_row(row) for row in self.rows]
@@ -1,11 +1,11 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
- from notionary.page.content.syntax import SyntaxRegistry
4
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
5
5
 
6
6
 
7
7
  class TableOfContentsMarkdownNode(MarkdownNode):
8
- def __init__(self, syntax_registry: SyntaxRegistry | None = None) -> None:
8
+ def __init__(self, syntax_registry: SyntaxDefinitionRegistry | None = None) -> None:
9
9
  super().__init__(syntax_registry=syntax_registry)
10
10
 
11
11
  @override
@@ -2,7 +2,7 @@ from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
4
  from notionary.page.content.markdown.nodes.container import ContainerNode
5
- from notionary.page.content.syntax import SyntaxRegistry
5
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
6
6
 
7
7
 
8
8
  class TodoMarkdownNode(ContainerNode):
@@ -14,7 +14,7 @@ class TodoMarkdownNode(ContainerNode):
14
14
  checked: bool = False,
15
15
  marker: str = "-",
16
16
  children: list[MarkdownNode] | None = None,
17
- syntax_registry: SyntaxRegistry | None = None,
17
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
18
18
  ):
19
19
  super().__init__(syntax_registry=syntax_registry)
20
20
  self.text = text
@@ -24,9 +24,13 @@ class TodoMarkdownNode(ContainerNode):
24
24
 
25
25
  @override
26
26
  def to_markdown(self) -> str:
27
- validated_marker = self._get_validated_marker()
28
- checkbox_state = self._get_checkbox_state()
29
- result = f"{validated_marker}{checkbox_state} {self.text}"
27
+ # Get the appropriate syntax based on checked state
28
+ if self.checked:
29
+ todo_syntax = self._syntax_registry.get_todo_done_syntax()
30
+ else:
31
+ todo_syntax = self._syntax_registry.get_todo_syntax()
32
+
33
+ result = f"{todo_syntax.start_delimiter} {self.text}"
30
34
  result += self.render_children()
31
35
  return result
32
36
 
@@ -34,5 +38,9 @@ class TodoMarkdownNode(ContainerNode):
34
38
  return self.marker if self.marker == self.VALID_MARKER else self.VALID_MARKER
35
39
 
36
40
  def _get_checkbox_state(self) -> str:
37
- todo_syntax = self._syntax_registry.get_todo_syntax()
38
- return todo_syntax.end_delimiter if self.checked else todo_syntax.start_delimiter
41
+ if self.checked:
42
+ todo_done_syntax = self._syntax_registry.get_todo_done_syntax()
43
+ return todo_done_syntax.start_delimiter
44
+ else:
45
+ todo_syntax = self._syntax_registry.get_todo_syntax()
46
+ return todo_syntax.start_delimiter
@@ -2,7 +2,7 @@ from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
4
  from notionary.page.content.markdown.nodes.container import ContainerNode
5
- from notionary.page.content.syntax import SyntaxRegistry
5
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
6
6
 
7
7
 
8
8
  class ToggleMarkdownNode(ContainerNode):
@@ -10,7 +10,7 @@ class ToggleMarkdownNode(ContainerNode):
10
10
  self,
11
11
  title: str,
12
12
  children: list[MarkdownNode] | None = None,
13
- syntax_registry: SyntaxRegistry | None = None,
13
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
14
14
  ):
15
15
  super().__init__(syntax_registry=syntax_registry)
16
16
  self.title = title
@@ -1,8 +1,10 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.page.content.markdown.nodes.base import MarkdownNode
4
- from notionary.page.content.markdown.nodes.mixins.caption import CaptionMarkdownNodeMixin
5
- from notionary.page.content.syntax import SyntaxRegistry
4
+ from notionary.page.content.markdown.nodes.mixins.caption import (
5
+ CaptionMarkdownNodeMixin,
6
+ )
7
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
6
8
 
7
9
 
8
10
  class VideoMarkdownNode(MarkdownNode, CaptionMarkdownNodeMixin):
@@ -10,7 +12,7 @@ class VideoMarkdownNode(MarkdownNode, CaptionMarkdownNodeMixin):
10
12
  self,
11
13
  url: str,
12
14
  caption: str | None = None,
13
- syntax_registry: SyntaxRegistry | None = None,
15
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
14
16
  ) -> None:
15
17
  super().__init__(syntax_registry=syntax_registry)
16
18
  self.url = url
@@ -19,5 +21,7 @@ class VideoMarkdownNode(MarkdownNode, CaptionMarkdownNodeMixin):
19
21
  @override
20
22
  def to_markdown(self) -> str:
21
23
  video_syntax = self._syntax_registry.get_video_syntax()
22
- base_markdown = f"{video_syntax.start_delimiter}{self.url}{video_syntax.end_delimiter}"
24
+ base_markdown = (
25
+ f"{video_syntax.start_delimiter}{self.url}{video_syntax.end_delimiter}"
26
+ )
23
27
  return self._append_caption_to_markdown(base_markdown, self.caption)
@@ -0,0 +1,73 @@
1
+ from .models import (
2
+ AnyMarkdownNode,
3
+ AudioSchema,
4
+ BookmarkSchema,
5
+ BreadcrumbSchema,
6
+ BulletedListItemSchema,
7
+ BulletedListSchema,
8
+ CalloutSchema,
9
+ CodeSchema,
10
+ ColumnSchema,
11
+ ColumnsSchema,
12
+ DividerSchema,
13
+ EmbedSchema,
14
+ EquationSchema,
15
+ FileSchema,
16
+ Heading1Schema,
17
+ Heading2Schema,
18
+ Heading3Schema,
19
+ ImageSchema,
20
+ MarkdownDocumentSchema,
21
+ MarkdownNodeSchema,
22
+ MermaidSchema,
23
+ NumberedListItemSchema,
24
+ NumberedListSchema,
25
+ ParagraphSchema,
26
+ PdfSchema,
27
+ QuoteSchema,
28
+ SpaceSchema,
29
+ TableOfContentsSchema,
30
+ TableSchema,
31
+ TodoListSchema,
32
+ TodoSchema,
33
+ ToggleSchema,
34
+ VideoSchema,
35
+ )
36
+ from .service import StructuredOutputMarkdownConverter
37
+
38
+ __all__ = [
39
+ "AnyMarkdownNode",
40
+ "AudioSchema",
41
+ "BookmarkSchema",
42
+ "BreadcrumbSchema",
43
+ "BulletedListItemSchema",
44
+ "BulletedListSchema",
45
+ "CalloutSchema",
46
+ "CodeSchema",
47
+ "ColumnSchema",
48
+ "ColumnsSchema",
49
+ "DividerSchema",
50
+ "EmbedSchema",
51
+ "EquationSchema",
52
+ "FileSchema",
53
+ "Heading1Schema",
54
+ "Heading2Schema",
55
+ "Heading3Schema",
56
+ "ImageSchema",
57
+ "MarkdownDocumentSchema",
58
+ "MarkdownNodeSchema",
59
+ "MermaidSchema",
60
+ "NumberedListItemSchema",
61
+ "NumberedListSchema",
62
+ "ParagraphSchema",
63
+ "PdfSchema",
64
+ "QuoteSchema",
65
+ "SpaceSchema",
66
+ "StructuredOutputMarkdownConverter",
67
+ "TableOfContentsSchema",
68
+ "TableSchema",
69
+ "TodoListSchema",
70
+ "TodoSchema",
71
+ "ToggleSchema",
72
+ "VideoSchema",
73
+ ]
@@ -0,0 +1,391 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import StrEnum
4
+ from typing import TYPE_CHECKING, Annotated, Literal
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+ from notionary.blocks.enums import CodingLanguage
9
+
10
+ if TYPE_CHECKING:
11
+ from notionary.page.content.markdown.structured_output.service import (
12
+ StructuredOutputMarkdownConverter,
13
+ )
14
+
15
+
16
+ class MarkdownNodeType(StrEnum):
17
+ PARAGRAPH = "paragraph"
18
+ HEADING_1 = "heading_1"
19
+ HEADING_2 = "heading_2"
20
+ HEADING_3 = "heading_3"
21
+ SPACE = "space"
22
+ DIVIDER = "divider"
23
+ QUOTE = "quote"
24
+ BULLETED_LIST = "bulleted_list"
25
+ BULLETED_LIST_ITEM = "bulleted_list_item"
26
+ NUMBERED_LIST = "numbered_list"
27
+ NUMBERED_LIST_ITEM = "numbered_list_item"
28
+ TODO = "todo"
29
+ TODO_LIST = "todo_list"
30
+ CALLOUT = "callout"
31
+ TOGGLE = "toggle"
32
+ IMAGE = "image"
33
+ VIDEO = "video"
34
+ AUDIO = "audio"
35
+ FILE = "file"
36
+ PDF = "pdf"
37
+ BOOKMARK = "bookmark"
38
+ EMBED = "embed"
39
+ CODE = "code"
40
+ MERMAID = "mermaid"
41
+ TABLE = "table"
42
+ BREADCRUMB = "breadcrumb"
43
+ EQUATION = "equation"
44
+ TABLE_OF_CONTENTS = "table_of_contents"
45
+ COLUMNS = "columns"
46
+
47
+
48
+ class MarkdownNodeSchema(BaseModel):
49
+ type: MarkdownNodeType
50
+
51
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
52
+ raise NotImplementedError(
53
+ f"{self.__class__.__name__} must implement process_with()"
54
+ )
55
+
56
+
57
+ class ParagraphSchema(MarkdownNodeSchema):
58
+ type: Literal[MarkdownNodeType.PARAGRAPH] = MarkdownNodeType.PARAGRAPH
59
+ text: str = Field(description="The paragraph text content")
60
+
61
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
62
+ processor._process_paragraph(self)
63
+
64
+
65
+ class Heading1Schema(MarkdownNodeSchema):
66
+ type: Literal["heading_1"] = "heading_1"
67
+ text: str = Field(description="The heading 1 text")
68
+ children: list[MarkdownNodeSchema] | None = Field(
69
+ default=None, description="Optional child nodes"
70
+ )
71
+
72
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
73
+ processor._process_heading_1(self)
74
+
75
+
76
+ class Heading2Schema(MarkdownNodeSchema):
77
+ type: Literal["heading_2"] = "heading_2"
78
+ text: str = Field(description="The heading 2 text")
79
+ children: list[MarkdownNodeSchema] | None = Field(
80
+ default=None, description="Optional child nodes"
81
+ )
82
+
83
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
84
+ processor._process_heading_2(self)
85
+
86
+
87
+ class Heading3Schema(MarkdownNodeSchema):
88
+ type: Literal["heading_3"] = "heading_3"
89
+ text: str = Field(description="The heading 3 text")
90
+ children: list[MarkdownNodeSchema] | None = Field(
91
+ default=None, description="Optional child nodes"
92
+ )
93
+
94
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
95
+ processor._process_heading_3(self)
96
+
97
+
98
+ class SpaceSchema(MarkdownNodeSchema):
99
+ type: Literal[MarkdownNodeType.SPACE] = MarkdownNodeType.SPACE
100
+
101
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
102
+ processor._process_space()
103
+
104
+
105
+ class DividerSchema(MarkdownNodeSchema):
106
+ type: Literal[MarkdownNodeType.DIVIDER] = MarkdownNodeType.DIVIDER
107
+
108
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
109
+ processor._process_divider()
110
+
111
+
112
+ class QuoteSchema(MarkdownNodeSchema):
113
+ type: Literal[MarkdownNodeType.QUOTE] = MarkdownNodeType.QUOTE
114
+ text: str = Field(description="The quote text")
115
+ children: list[MarkdownNodeSchema] | None = Field(
116
+ default=None, description="Optional child nodes"
117
+ )
118
+
119
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
120
+ processor._process_quote(self)
121
+
122
+
123
+ class BulletedListItemSchema(MarkdownNodeSchema):
124
+ type: Literal[MarkdownNodeType.BULLETED_LIST_ITEM] = (
125
+ MarkdownNodeType.BULLETED_LIST_ITEM
126
+ )
127
+ text: str = Field(description="The bullet point text")
128
+ children: list[MarkdownNodeSchema] | None = Field(
129
+ default=None, description="Optional nested content"
130
+ )
131
+
132
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
133
+ processor._process_bulleted_list_item(self)
134
+
135
+
136
+ class BulletedListSchema(MarkdownNodeSchema):
137
+ type: Literal[MarkdownNodeType.BULLETED_LIST] = MarkdownNodeType.BULLETED_LIST
138
+ items: list[BulletedListItemSchema] = Field(
139
+ description="List of BulletedListItemSchema objects. Each item must have 'type', 'text', and optionally 'children'"
140
+ )
141
+
142
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
143
+ processor._process_bulleted_list(self)
144
+
145
+
146
+ class NumberedListItemSchema(MarkdownNodeSchema):
147
+ type: Literal[MarkdownNodeType.NUMBERED_LIST_ITEM] = (
148
+ MarkdownNodeType.NUMBERED_LIST_ITEM
149
+ )
150
+ text: str = Field(description="The numbered item text")
151
+ children: list[MarkdownNodeSchema] | None = Field(
152
+ default=None, description="Optional nested content"
153
+ )
154
+
155
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
156
+ processor._process_numbered_list_item(self)
157
+
158
+
159
+ class NumberedListSchema(MarkdownNodeSchema):
160
+ type: Literal[MarkdownNodeType.NUMBERED_LIST] = MarkdownNodeType.NUMBERED_LIST
161
+ items: list[NumberedListItemSchema] = Field(
162
+ description="List of NumberedListItemSchema objects. Each item must have 'type', 'text', and optionally 'children'"
163
+ )
164
+
165
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
166
+ processor._process_numbered_list(self)
167
+
168
+
169
+ class TodoSchema(MarkdownNodeSchema):
170
+ type: Literal[MarkdownNodeType.TODO] = MarkdownNodeType.TODO
171
+ text: str = Field(description="The todo item text")
172
+ checked: bool = Field(default=False, description="Whether the todo is completed")
173
+ children: list[MarkdownNodeSchema] | None = Field(
174
+ default=None, description="Optional nested content"
175
+ )
176
+
177
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
178
+ processor._process_todo(self)
179
+
180
+
181
+ class TodoListSchema(MarkdownNodeSchema):
182
+ type: Literal[MarkdownNodeType.TODO_LIST] = MarkdownNodeType.TODO_LIST
183
+ items: list[TodoSchema] = Field(
184
+ description="List of TodoSchema objects. Each item must have 'type', 'text', 'checked', and optionally 'children'"
185
+ )
186
+
187
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
188
+ processor._process_todo_list(self)
189
+
190
+
191
+ class CalloutSchema(MarkdownNodeSchema):
192
+ type: Literal[MarkdownNodeType.CALLOUT] = MarkdownNodeType.CALLOUT
193
+ text: str = Field(description="The callout text")
194
+ emoji: str | None = Field(default=None, description="Optional emoji icon")
195
+ children: list[MarkdownNodeSchema] | None = Field(
196
+ default=None, description="Optional child nodes"
197
+ )
198
+
199
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
200
+ processor._process_callout(self)
201
+
202
+
203
+ class ToggleSchema(MarkdownNodeSchema):
204
+ type: Literal[MarkdownNodeType.TOGGLE] = MarkdownNodeType.TOGGLE
205
+ title: str = Field(description="The toggle title")
206
+ children: list[MarkdownNodeSchema] = Field(description="Content inside the toggle")
207
+
208
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
209
+ processor._process_toggle(self)
210
+
211
+
212
+ class ImageSchema(MarkdownNodeSchema):
213
+ type: Literal[MarkdownNodeType.IMAGE] = MarkdownNodeType.IMAGE
214
+ url: str = Field(description="Image URL")
215
+ caption: str | None = Field(default=None, description="Optional caption")
216
+
217
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
218
+ processor._process_image(self)
219
+
220
+
221
+ class VideoSchema(MarkdownNodeSchema):
222
+ type: Literal[MarkdownNodeType.VIDEO] = MarkdownNodeType.VIDEO
223
+ url: str = Field(description="Video URL")
224
+ caption: str | None = Field(default=None, description="Optional caption")
225
+
226
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
227
+ processor._process_video(self)
228
+
229
+
230
+ class AudioSchema(MarkdownNodeSchema):
231
+ type: Literal[MarkdownNodeType.AUDIO] = MarkdownNodeType.AUDIO
232
+ url: str = Field(description="Audio URL")
233
+ caption: str | None = Field(default=None, description="Optional caption")
234
+
235
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
236
+ processor._process_audio(self)
237
+
238
+
239
+ class FileSchema(MarkdownNodeSchema):
240
+ type: Literal[MarkdownNodeType.FILE] = MarkdownNodeType.FILE
241
+ url: str = Field(description="File URL")
242
+ caption: str | None = Field(default=None, description="Optional caption")
243
+
244
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
245
+ processor._process_file(self)
246
+
247
+
248
+ class PdfSchema(MarkdownNodeSchema):
249
+ type: Literal[MarkdownNodeType.PDF] = MarkdownNodeType.PDF
250
+ url: str = Field(description="PDF URL")
251
+ caption: str | None = Field(default=None, description="Optional caption")
252
+
253
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
254
+ processor._process_pdf(self)
255
+
256
+
257
+ class BookmarkSchema(MarkdownNodeSchema):
258
+ type: Literal[MarkdownNodeType.BOOKMARK] = MarkdownNodeType.BOOKMARK
259
+ url: str = Field(description="Bookmark URL")
260
+ title: str | None = Field(default=None, description="Optional title")
261
+ caption: str | None = Field(default=None, description="Optional caption")
262
+
263
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
264
+ processor._process_bookmark(self)
265
+
266
+
267
+ class EmbedSchema(MarkdownNodeSchema):
268
+ type: Literal[MarkdownNodeType.EMBED] = MarkdownNodeType.EMBED
269
+ url: str = Field(description="Embed URL")
270
+ caption: str | None = Field(default=None, description="Optional caption")
271
+
272
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
273
+ processor._process_embed(self)
274
+
275
+
276
+ class CodeSchema(MarkdownNodeSchema):
277
+ type: Literal[MarkdownNodeType.CODE] = MarkdownNodeType.CODE
278
+ code: str = Field(description="Code content")
279
+ language: CodingLanguage | None = Field(
280
+ default=None, description="Programming language"
281
+ )
282
+ caption: str | None = Field(default=None, description="Optional caption")
283
+
284
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
285
+ processor._process_code(self)
286
+
287
+
288
+ class MermaidSchema(MarkdownNodeSchema):
289
+ type: Literal[MarkdownNodeType.MERMAID] = MarkdownNodeType.MERMAID
290
+ diagram: str = Field(description="Mermaid diagram code")
291
+ caption: str | None = Field(default=None, description="Optional caption")
292
+
293
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
294
+ processor._process_mermaid(self)
295
+
296
+
297
+ class TableSchema(MarkdownNodeSchema):
298
+ type: Literal[MarkdownNodeType.TABLE] = MarkdownNodeType.TABLE
299
+ headers: list[str] = Field(description="Table header row")
300
+ rows: list[list[str]] = Field(description="Table data rows")
301
+
302
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
303
+ processor._process_table(self)
304
+
305
+
306
+ class BreadcrumbSchema(MarkdownNodeSchema):
307
+ type: Literal[MarkdownNodeType.BREADCRUMB] = MarkdownNodeType.BREADCRUMB
308
+
309
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
310
+ processor._process_breadcrumb(self)
311
+
312
+
313
+ class EquationSchema(MarkdownNodeSchema):
314
+ type: Literal[MarkdownNodeType.EQUATION] = MarkdownNodeType.EQUATION
315
+ expression: str = Field(description="LaTeX equation expression")
316
+
317
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
318
+ processor._process_equation(self)
319
+
320
+
321
+ class TableOfContentsSchema(MarkdownNodeSchema):
322
+ type: Literal[MarkdownNodeType.TABLE_OF_CONTENTS] = (
323
+ MarkdownNodeType.TABLE_OF_CONTENTS
324
+ )
325
+
326
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
327
+ processor._process_table_of_contents(self)
328
+
329
+
330
+ class ColumnSchema(BaseModel):
331
+ """Single column in a multi-column layout."""
332
+
333
+ width_ratio: float | None = Field(
334
+ default=None,
335
+ description="Relative width of this column (e.g., 0.5 for half width). If not specified, columns are equal width",
336
+ )
337
+ children: list[MarkdownNodeSchema] = Field(
338
+ description="Content inside this column. Can contain any markdown nodes"
339
+ )
340
+
341
+
342
+ class ColumnsSchema(MarkdownNodeSchema):
343
+ type: Literal[MarkdownNodeType.COLUMNS] = MarkdownNodeType.COLUMNS
344
+ columns: list[ColumnSchema] = Field(
345
+ description="List of columns in this layout. Each column contains its own content"
346
+ )
347
+
348
+ def process_with(self, processor: StructuredOutputMarkdownConverter) -> None:
349
+ processor._process_columns(self)
350
+
351
+
352
+ type AnyMarkdownNode = Annotated[
353
+ Heading1Schema
354
+ | Heading2Schema
355
+ | Heading3Schema
356
+ | ParagraphSchema
357
+ | SpaceSchema
358
+ | DividerSchema
359
+ | QuoteSchema
360
+ | BulletedListSchema
361
+ | BulletedListItemSchema
362
+ | NumberedListSchema
363
+ | NumberedListItemSchema
364
+ | TodoSchema
365
+ | TodoListSchema
366
+ | CalloutSchema
367
+ | ToggleSchema
368
+ | ImageSchema
369
+ | VideoSchema
370
+ | AudioSchema
371
+ | FileSchema
372
+ | PdfSchema
373
+ | BookmarkSchema
374
+ | EmbedSchema
375
+ | CodeSchema
376
+ | MermaidSchema
377
+ | TableSchema
378
+ | BreadcrumbSchema
379
+ | EquationSchema
380
+ | TableOfContentsSchema
381
+ | ColumnsSchema,
382
+ Field(discriminator="type"),
383
+ ]
384
+
385
+
386
+ class MarkdownDocumentSchema(BaseModel):
387
+ model_config = ConfigDict(extra="forbid")
388
+
389
+ nodes: list[AnyMarkdownNode] = Field(
390
+ description="Ordered list of top-level markdown nodes in the document. Each node can contain nested children"
391
+ )