notionary 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. notionary/__init__.py +49 -1
  2. notionary/blocks/client.py +37 -11
  3. notionary/blocks/enums.py +0 -6
  4. notionary/blocks/rich_text/markdown_rich_text_converter.py +49 -15
  5. notionary/blocks/rich_text/models.py +13 -4
  6. notionary/blocks/rich_text/name_id_resolver/data_source.py +9 -3
  7. notionary/blocks/rich_text/name_id_resolver/person.py +6 -2
  8. notionary/blocks/rich_text/rich_text_markdown_converter.py +10 -3
  9. notionary/blocks/schemas.py +33 -78
  10. notionary/comments/client.py +19 -6
  11. notionary/comments/factory.py +10 -3
  12. notionary/comments/schemas.py +10 -31
  13. notionary/comments/service.py +12 -4
  14. notionary/data_source/http/data_source_instance_client.py +59 -17
  15. notionary/data_source/properties/schemas.py +156 -115
  16. notionary/data_source/query/builder.py +67 -18
  17. notionary/data_source/query/resolver.py +16 -5
  18. notionary/data_source/query/schema.py +24 -6
  19. notionary/data_source/query/validator.py +18 -6
  20. notionary/data_source/schema/registry.py +31 -12
  21. notionary/data_source/schema/service.py +66 -20
  22. notionary/data_source/schemas.py +2 -2
  23. notionary/data_source/service.py +103 -43
  24. notionary/database/client.py +27 -9
  25. notionary/database/database_metadata_update_client.py +12 -4
  26. notionary/database/schemas.py +2 -2
  27. notionary/database/service.py +14 -9
  28. notionary/exceptions/__init__.py +20 -4
  29. notionary/exceptions/api.py +2 -2
  30. notionary/exceptions/base.py +1 -1
  31. notionary/exceptions/block_parsing.py +9 -5
  32. notionary/exceptions/data_source/builder.py +13 -7
  33. notionary/exceptions/data_source/properties.py +6 -4
  34. notionary/exceptions/file_upload.py +76 -0
  35. notionary/exceptions/properties.py +7 -5
  36. notionary/exceptions/search.py +10 -6
  37. notionary/file_upload/__init__.py +4 -0
  38. notionary/file_upload/client.py +128 -210
  39. notionary/file_upload/config/__init__.py +17 -0
  40. notionary/file_upload/config/config.py +39 -0
  41. notionary/file_upload/config/constants.py +16 -0
  42. notionary/file_upload/file/reader.py +28 -0
  43. notionary/file_upload/query/__init__.py +7 -0
  44. notionary/file_upload/query/builder.py +58 -0
  45. notionary/file_upload/query/models.py +37 -0
  46. notionary/file_upload/schemas.py +80 -0
  47. notionary/file_upload/service.py +182 -291
  48. notionary/file_upload/validation/factory.py +66 -0
  49. notionary/file_upload/validation/impl/file_name_length.py +25 -0
  50. notionary/file_upload/validation/models.py +134 -0
  51. notionary/file_upload/validation/port.py +7 -0
  52. notionary/file_upload/validation/service.py +17 -0
  53. notionary/file_upload/validation/validators/__init__.py +11 -0
  54. notionary/file_upload/validation/validators/file_exists.py +15 -0
  55. notionary/file_upload/validation/validators/file_extension.py +131 -0
  56. notionary/file_upload/validation/validators/file_name_length.py +21 -0
  57. notionary/file_upload/validation/validators/upload_limit.py +31 -0
  58. notionary/http/client.py +33 -30
  59. notionary/page/content/__init__.py +9 -0
  60. notionary/page/content/factory.py +21 -7
  61. notionary/page/content/markdown/builder.py +85 -23
  62. notionary/page/content/markdown/nodes/audio.py +8 -4
  63. notionary/page/content/markdown/nodes/base.py +3 -3
  64. notionary/page/content/markdown/nodes/bookmark.py +5 -3
  65. notionary/page/content/markdown/nodes/breadcrumb.py +2 -2
  66. notionary/page/content/markdown/nodes/bulleted_list.py +5 -3
  67. notionary/page/content/markdown/nodes/callout.py +2 -2
  68. notionary/page/content/markdown/nodes/code.py +5 -3
  69. notionary/page/content/markdown/nodes/columns.py +3 -3
  70. notionary/page/content/markdown/nodes/container.py +9 -5
  71. notionary/page/content/markdown/nodes/divider.py +2 -2
  72. notionary/page/content/markdown/nodes/embed.py +8 -4
  73. notionary/page/content/markdown/nodes/equation.py +4 -2
  74. notionary/page/content/markdown/nodes/file.py +8 -4
  75. notionary/page/content/markdown/nodes/heading.py +2 -2
  76. notionary/page/content/markdown/nodes/image.py +8 -4
  77. notionary/page/content/markdown/nodes/mixins/caption.py +5 -3
  78. notionary/page/content/markdown/nodes/numbered_list.py +5 -3
  79. notionary/page/content/markdown/nodes/paragraph.py +4 -2
  80. notionary/page/content/markdown/nodes/pdf.py +8 -4
  81. notionary/page/content/markdown/nodes/quote.py +2 -2
  82. notionary/page/content/markdown/nodes/space.py +2 -2
  83. notionary/page/content/markdown/nodes/table.py +8 -5
  84. notionary/page/content/markdown/nodes/table_of_contents.py +2 -2
  85. notionary/page/content/markdown/nodes/todo.py +15 -7
  86. notionary/page/content/markdown/nodes/toggle.py +2 -2
  87. notionary/page/content/markdown/nodes/video.py +8 -4
  88. notionary/page/content/markdown/structured_output/__init__.py +73 -0
  89. notionary/page/content/markdown/structured_output/models.py +391 -0
  90. notionary/page/content/markdown/structured_output/service.py +211 -0
  91. notionary/page/content/parser/context.py +1 -1
  92. notionary/page/content/parser/factory.py +26 -8
  93. notionary/page/content/parser/parsers/audio.py +12 -32
  94. notionary/page/content/parser/parsers/base.py +2 -2
  95. notionary/page/content/parser/parsers/bookmark.py +2 -2
  96. notionary/page/content/parser/parsers/breadcrumb.py +2 -2
  97. notionary/page/content/parser/parsers/bulleted_list.py +19 -6
  98. notionary/page/content/parser/parsers/callout.py +15 -5
  99. notionary/page/content/parser/parsers/caption.py +9 -3
  100. notionary/page/content/parser/parsers/code.py +21 -7
  101. notionary/page/content/parser/parsers/column.py +8 -4
  102. notionary/page/content/parser/parsers/column_list.py +19 -7
  103. notionary/page/content/parser/parsers/divider.py +2 -2
  104. notionary/page/content/parser/parsers/embed.py +2 -4
  105. notionary/page/content/parser/parsers/equation.py +8 -4
  106. notionary/page/content/parser/parsers/file.py +12 -34
  107. notionary/page/content/parser/parsers/file_like_block.py +109 -0
  108. notionary/page/content/parser/parsers/heading.py +31 -10
  109. notionary/page/content/parser/parsers/image.py +12 -34
  110. notionary/page/content/parser/parsers/numbered_list.py +18 -6
  111. notionary/page/content/parser/parsers/paragraph.py +3 -1
  112. notionary/page/content/parser/parsers/pdf.py +12 -34
  113. notionary/page/content/parser/parsers/quote.py +28 -9
  114. notionary/page/content/parser/parsers/space.py +2 -2
  115. notionary/page/content/parser/parsers/table.py +31 -10
  116. notionary/page/content/parser/parsers/table_of_contents.py +7 -3
  117. notionary/page/content/parser/parsers/todo.py +15 -5
  118. notionary/page/content/parser/parsers/toggle.py +15 -5
  119. notionary/page/content/parser/parsers/video.py +12 -34
  120. notionary/page/content/parser/post_processing/handlers/rich_text_length.py +8 -2
  121. notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +8 -2
  122. notionary/page/content/parser/post_processing/service.py +3 -1
  123. notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +21 -7
  124. notionary/page/content/parser/pre_processsing/handlers/indentation.py +11 -4
  125. notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +13 -6
  126. notionary/page/content/parser/service.py +4 -1
  127. notionary/page/content/renderer/context.py +15 -5
  128. notionary/page/content/renderer/factory.py +12 -6
  129. notionary/page/content/renderer/post_processing/handlers/numbered_list.py +19 -9
  130. notionary/page/content/renderer/renderers/audio.py +20 -23
  131. notionary/page/content/renderer/renderers/base.py +3 -3
  132. notionary/page/content/renderer/renderers/bookmark.py +3 -1
  133. notionary/page/content/renderer/renderers/bulleted_list.py +11 -5
  134. notionary/page/content/renderer/renderers/callout.py +19 -7
  135. notionary/page/content/renderer/renderers/captioned_block.py +11 -5
  136. notionary/page/content/renderer/renderers/code.py +6 -2
  137. notionary/page/content/renderer/renderers/column.py +3 -1
  138. notionary/page/content/renderer/renderers/column_list.py +3 -1
  139. notionary/page/content/renderer/renderers/embed.py +3 -1
  140. notionary/page/content/renderer/renderers/equation.py +3 -1
  141. notionary/page/content/renderer/renderers/file.py +20 -23
  142. notionary/page/content/renderer/renderers/file_like_block.py +47 -0
  143. notionary/page/content/renderer/renderers/heading.py +22 -8
  144. notionary/page/content/renderer/renderers/image.py +20 -23
  145. notionary/page/content/renderer/renderers/numbered_list.py +8 -3
  146. notionary/page/content/renderer/renderers/paragraph.py +12 -4
  147. notionary/page/content/renderer/renderers/pdf.py +20 -23
  148. notionary/page/content/renderer/renderers/quote.py +14 -6
  149. notionary/page/content/renderer/renderers/table.py +15 -5
  150. notionary/page/content/renderer/renderers/todo.py +16 -6
  151. notionary/page/content/renderer/renderers/toggle.py +8 -4
  152. notionary/page/content/renderer/renderers/video.py +20 -23
  153. notionary/page/content/renderer/service.py +9 -3
  154. notionary/page/content/service.py +21 -7
  155. notionary/page/content/syntax/definition/__init__.py +11 -0
  156. notionary/page/content/syntax/definition/models.py +57 -0
  157. notionary/page/content/syntax/definition/registry.py +371 -0
  158. notionary/page/content/syntax/prompts/__init__.py +4 -0
  159. notionary/page/content/syntax/prompts/models.py +11 -0
  160. notionary/page/content/syntax/prompts/registry.py +703 -0
  161. notionary/page/page_metadata_update_client.py +12 -4
  162. notionary/page/properties/client.py +46 -16
  163. notionary/page/properties/factory.py +6 -2
  164. notionary/page/properties/{models.py → schemas.py} +93 -107
  165. notionary/page/properties/service.py +111 -37
  166. notionary/page/schemas.py +3 -3
  167. notionary/page/service.py +21 -7
  168. notionary/shared/entity/client.py +6 -2
  169. notionary/shared/entity/dto_parsers.py +4 -37
  170. notionary/shared/entity/entity_metadata_update_client.py +25 -5
  171. notionary/shared/entity/schemas.py +6 -6
  172. notionary/shared/entity/service.py +89 -35
  173. notionary/shared/models/file.py +36 -6
  174. notionary/shared/models/icon.py +5 -12
  175. notionary/user/base.py +6 -2
  176. notionary/user/bot.py +22 -14
  177. notionary/user/client.py +3 -1
  178. notionary/user/person.py +3 -1
  179. notionary/user/schemas.py +3 -1
  180. notionary/user/service.py +6 -2
  181. notionary/utils/decorators.py +13 -9
  182. notionary/utils/fuzzy.py +6 -2
  183. notionary/utils/mixins/logging.py +3 -1
  184. notionary/utils/pagination.py +14 -4
  185. notionary/workspace/__init__.py +6 -2
  186. notionary/workspace/query/__init__.py +2 -1
  187. notionary/workspace/query/service.py +42 -13
  188. notionary/workspace/service.py +74 -46
  189. {notionary-0.3.1.dist-info → notionary-0.4.1.dist-info}/METADATA +1 -1
  190. notionary-0.4.1.dist-info/RECORD +236 -0
  191. notionary/file_upload/models.py +0 -69
  192. notionary/page/blocks/client.py +0 -1
  193. notionary/page/content/syntax/__init__.py +0 -4
  194. notionary/page/content/syntax/models.py +0 -66
  195. notionary/page/content/syntax/registry.py +0 -393
  196. notionary/page/page_context.py +0 -50
  197. notionary/shared/models/cover.py +0 -20
  198. notionary-0.3.1.dist-info/RECORD +0 -211
  199. /notionary/page/content/syntax/{grammar.py → definition/grammar.py} +0 -0
  200. {notionary-0.3.1.dist-info → notionary-0.4.1.dist-info}/WHEEL +0 -0
  201. {notionary-0.3.1.dist-info → notionary-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,21 +1,25 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.blocks.enums import BlockType
4
- from notionary.blocks.rich_text.rich_text_markdown_converter import RichTextToMarkdownConverter
4
+ from notionary.blocks.rich_text.rich_text_markdown_converter import (
5
+ RichTextToMarkdownConverter,
6
+ )
5
7
  from notionary.blocks.schemas import Block
6
8
  from notionary.page.content.renderer.context import MarkdownRenderingContext
7
9
  from notionary.page.content.renderer.renderers.base import BlockRenderer
8
- from notionary.page.content.syntax import SyntaxRegistry
10
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
9
11
 
10
12
 
11
13
  class TodoRenderer(BlockRenderer):
12
14
  def __init__(
13
15
  self,
14
- syntax_registry: SyntaxRegistry | None = None,
16
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
15
17
  rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
16
18
  ) -> None:
17
19
  super().__init__(syntax_registry=syntax_registry)
18
- self._rich_text_markdown_converter = rich_text_markdown_converter or RichTextToMarkdownConverter()
20
+ self._rich_text_markdown_converter = (
21
+ rich_text_markdown_converter or RichTextToMarkdownConverter()
22
+ )
19
23
 
20
24
  @override
21
25
  def _can_handle(self, block: Block) -> bool:
@@ -29,7 +33,11 @@ class TodoRenderer(BlockRenderer):
29
33
  context.markdown_result = ""
30
34
  return
31
35
 
32
- syntax = self._syntax_registry.get_todo_done_syntax() if is_checked else self._syntax_registry.get_todo_syntax()
36
+ syntax = (
37
+ self._syntax_registry.get_todo_done_syntax()
38
+ if is_checked
39
+ else self._syntax_registry.get_todo_syntax()
40
+ )
33
41
 
34
42
  todo_markdown = f"{syntax.start_delimiter} {content}"
35
43
 
@@ -51,6 +59,8 @@ class TodoRenderer(BlockRenderer):
51
59
 
52
60
  content = ""
53
61
  if block.to_do.rich_text:
54
- content = await self._rich_text_markdown_converter.to_markdown(block.to_do.rich_text)
62
+ content = await self._rich_text_markdown_converter.to_markdown(
63
+ block.to_do.rich_text
64
+ )
55
65
 
56
66
  return is_checked, content
@@ -1,21 +1,25 @@
1
1
  from typing import override
2
2
 
3
3
  from notionary.blocks.enums import BlockType
4
- from notionary.blocks.rich_text.rich_text_markdown_converter import RichTextToMarkdownConverter
4
+ from notionary.blocks.rich_text.rich_text_markdown_converter import (
5
+ RichTextToMarkdownConverter,
6
+ )
5
7
  from notionary.blocks.schemas import Block
6
8
  from notionary.page.content.renderer.context import MarkdownRenderingContext
7
9
  from notionary.page.content.renderer.renderers.base import BlockRenderer
8
- from notionary.page.content.syntax import SyntaxRegistry
10
+ from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
9
11
 
10
12
 
11
13
  class ToggleRenderer(BlockRenderer):
12
14
  def __init__(
13
15
  self,
14
- syntax_registry: SyntaxRegistry | None = None,
16
+ syntax_registry: SyntaxDefinitionRegistry | None = None,
15
17
  rich_text_markdown_converter: RichTextToMarkdownConverter | None = None,
16
18
  ) -> None:
17
19
  super().__init__(syntax_registry=syntax_registry)
18
- self._rich_text_markdown_converter = rich_text_markdown_converter or RichTextToMarkdownConverter()
20
+ self._rich_text_markdown_converter = (
21
+ rich_text_markdown_converter or RichTextToMarkdownConverter()
22
+ )
19
23
 
20
24
  @override
21
25
  def _can_handle(self, block: Block) -> bool:
@@ -1,31 +1,28 @@
1
1
  from typing import override
2
2
 
3
- from notionary.blocks.schemas import Block, BlockType
4
- from notionary.page.content.renderer.renderers.captioned_block import CaptionedBlockRenderer
5
-
6
-
7
- class VideoRenderer(CaptionedBlockRenderer):
3
+ from notionary.blocks.schemas import (
4
+ Block,
5
+ BlockType,
6
+ ExternalFileWithCaption,
7
+ NotionHostedFileWithCaption,
8
+ )
9
+ from notionary.page.content.renderer.renderers.file_like_block import (
10
+ FileLikeBlockRenderer,
11
+ )
12
+ from notionary.page.content.syntax.definition import EnclosedSyntaxDefinition
13
+
14
+
15
+ class VideoRenderer(FileLikeBlockRenderer):
8
16
  @override
9
17
  def _can_handle(self, block: Block) -> bool:
10
18
  return block.type == BlockType.VIDEO
11
19
 
12
20
  @override
13
- async def _render_main_content(self, block: Block) -> str:
14
- url = self._extract_video_url(block)
15
-
16
- if not url:
17
- return ""
18
-
19
- syntax = self._syntax_registry.get_video_syntax()
20
- return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
21
+ def _get_syntax(self) -> EnclosedSyntaxDefinition:
22
+ return self._syntax_registry.get_video_syntax()
21
23
 
22
- def _extract_video_url(self, block: Block) -> str:
23
- if not block.video:
24
- return ""
25
-
26
- if block.video.external:
27
- return block.video.external.url or ""
28
- elif block.video.file:
29
- return block.video.file.url or ""
30
-
31
- return ""
24
+ @override
25
+ def _get_file_data(
26
+ self, block: Block
27
+ ) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
28
+ return block.video
@@ -1,6 +1,8 @@
1
1
  from notionary.blocks.schemas import Block
2
2
  from notionary.page.content.renderer.context import MarkdownRenderingContext
3
- from notionary.page.content.renderer.post_processing.service import MarkdownRenderingPostProcessor
3
+ from notionary.page.content.renderer.post_processing.service import (
4
+ MarkdownRenderingPostProcessor,
5
+ )
4
6
  from notionary.page.content.renderer.renderers import BlockRenderer
5
7
  from notionary.utils.mixins.logging import LoggingMixin
6
8
 
@@ -22,7 +24,9 @@ class NotionToMarkdownConverter(LoggingMixin):
22
24
  current_block_index = 0
23
25
 
24
26
  while current_block_index < len(blocks):
25
- context = self._create_rendering_context(blocks, current_block_index, indent_level)
27
+ context = self._create_rendering_context(
28
+ blocks, current_block_index, indent_level
29
+ )
26
30
  await self._renderer_chain.handle(context)
27
31
 
28
32
  if context.markdown_result:
@@ -45,6 +49,8 @@ class NotionToMarkdownConverter(LoggingMixin):
45
49
  convert_children_callback=self.convert,
46
50
  )
47
51
 
48
- def _join_rendered_blocks(self, rendered_parts: list[str], indent_level: int) -> str:
52
+ def _join_rendered_blocks(
53
+ self, rendered_parts: list[str], indent_level: int
54
+ ) -> str:
49
55
  separator = "\n\n" if indent_level == 0 else "\n"
50
56
  return separator.join(rendered_parts)
@@ -30,13 +30,17 @@ class PageContentService(LoggingMixin):
30
30
 
31
31
  @time_execution_async()
32
32
  async def clear(self) -> None:
33
- children_response = await self._block_client.get_block_children(block_id=self._page_id)
33
+ children_response = await self._block_client.get_block_children(
34
+ block_id=self._page_id
35
+ )
34
36
 
35
37
  if not children_response or not children_response.results:
36
38
  self.logger.debug("No blocks to delete for page: %s", self._page_id)
37
39
  return
38
40
 
39
- await asyncio.gather(*[self._delete_single_block(block) for block in children_response.results])
41
+ await asyncio.gather(
42
+ *[self._delete_single_block(block) for block in children_response.results]
43
+ )
40
44
 
41
45
  @async_retry(max_retries=10, initial_delay=0.2, backoff_factor=1.5)
42
46
  async def _delete_single_block(self, block: Block) -> None:
@@ -44,16 +48,22 @@ class PageContentService(LoggingMixin):
44
48
  await self._block_client.delete_block(block.id)
45
49
 
46
50
  @time_execution_async()
47
- async def append_markdown(self, content: str | Callable[[MarkdownBuilder], MarkdownBuilder]) -> None:
51
+ async def append_markdown(
52
+ self, content: str | Callable[[MarkdownBuilder], MarkdownBuilder]
53
+ ) -> None:
48
54
  markdown = self._extract_markdown(content)
49
55
  if not markdown:
50
- self.logger.debug("No markdown content to append for page: %s", self._page_id)
56
+ self.logger.debug(
57
+ "No markdown content to append for page: %s", self._page_id
58
+ )
51
59
  return
52
60
 
53
61
  blocks = await self._markdown_converter.convert(markdown)
54
62
  await self._append_blocks(blocks)
55
63
 
56
- def _extract_markdown(self, content: str | Callable[[MarkdownBuilder], MarkdownBuilder]) -> str:
64
+ def _extract_markdown(
65
+ self, content: str | Callable[[MarkdownBuilder], MarkdownBuilder]
66
+ ) -> str:
57
67
  if isinstance(content, str):
58
68
  return content
59
69
 
@@ -62,7 +72,11 @@ class PageContentService(LoggingMixin):
62
72
  content(builder)
63
73
  return builder.build()
64
74
 
65
- raise ValueError("content must be either a string or a callable that takes a MarkdownBuilder")
75
+ raise ValueError(
76
+ "content must be either a string or a callable that takes a MarkdownBuilder"
77
+ )
66
78
 
67
79
  async def _append_blocks(self, blocks: list[Block]) -> None:
68
- await self._block_client.append_block_children(block_id=self._page_id, children=blocks)
80
+ await self._block_client.append_block_children(
81
+ block_id=self._page_id, children=blocks
82
+ )
@@ -0,0 +1,11 @@
1
+ from .grammar import MarkdownGrammar
2
+ from .models import EnclosedSyntaxDefinition, SimpleSyntaxDefinition, SyntaxDefinition
3
+ from .registry import SyntaxDefinitionRegistry
4
+
5
+ __all__ = [
6
+ "EnclosedSyntaxDefinition",
7
+ "MarkdownGrammar",
8
+ "SimpleSyntaxDefinition",
9
+ "SyntaxDefinition",
10
+ "SyntaxDefinitionRegistry",
11
+ ]
@@ -0,0 +1,57 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from enum import StrEnum
4
+
5
+
6
+ class SyntaxDefinitionRegistryKey(StrEnum):
7
+ AUDIO = "audio"
8
+ BOOKMARK = "bookmark"
9
+ IMAGE = "image"
10
+ VIDEO = "video"
11
+ FILE = "file"
12
+ PDF = "pdf"
13
+
14
+ BULLETED_LIST = "bulleted_list"
15
+ NUMBERED_LIST = "numbered_list"
16
+ TO_DO = "todo"
17
+ TO_DO_DONE = "todo_done"
18
+
19
+ TOGGLE = "toggle"
20
+ TOGGLEABLE_HEADING = "toggleable_heading"
21
+ CALLOUT = "callout"
22
+ QUOTE = "quote"
23
+ CODE = "code"
24
+
25
+ COLUMN_LIST = "column_list"
26
+ COLUMN = "column"
27
+
28
+ HEADING = "heading"
29
+
30
+ DIVIDER = "divider"
31
+ BREADCRUMB = "breadcrumb"
32
+ TABLE_OF_CONTENTS = "table_of_contents"
33
+ EQUATION = "equation"
34
+ EMBED = "embed"
35
+ TABLE = "table"
36
+ TABLE_ROW = "table_row"
37
+
38
+ CAPTION = "caption"
39
+ SPACE = "space"
40
+ PARAGRAPH = "paragraph"
41
+
42
+
43
+ @dataclass(frozen=True)
44
+ class SimpleSyntaxDefinition:
45
+ start_delimiter: str
46
+ regex_pattern: re.Pattern
47
+
48
+
49
+ @dataclass(frozen=True)
50
+ class EnclosedSyntaxDefinition:
51
+ start_delimiter: str
52
+ end_delimiter: str
53
+ regex_pattern: re.Pattern
54
+ end_regex_pattern: re.Pattern
55
+
56
+
57
+ type SyntaxDefinition = SimpleSyntaxDefinition | EnclosedSyntaxDefinition
@@ -0,0 +1,371 @@
1
+ import re
2
+
3
+ from notionary.page.content.syntax.definition.grammar import MarkdownGrammar
4
+ from notionary.page.content.syntax.definition.models import (
5
+ EnclosedSyntaxDefinition,
6
+ SimpleSyntaxDefinition,
7
+ SyntaxDefinition,
8
+ SyntaxDefinitionRegistryKey,
9
+ )
10
+
11
+
12
+ class SyntaxDefinitionRegistry:
13
+ def __init__(
14
+ self, markdown_markdown_grammar: MarkdownGrammar | None = None
15
+ ) -> None:
16
+ self._markdown_grammar = markdown_markdown_grammar or MarkdownGrammar()
17
+ self._definitions: dict[SyntaxDefinitionRegistryKey, SyntaxDefinition] = {}
18
+ self._register_defaults()
19
+
20
+ def get_breadcrumb_syntax(self) -> SimpleSyntaxDefinition:
21
+ return self._definitions[SyntaxDefinitionRegistryKey.BREADCRUMB]
22
+
23
+ def get_bulleted_list_syntax(self) -> SimpleSyntaxDefinition:
24
+ return self._definitions[SyntaxDefinitionRegistryKey.BULLETED_LIST]
25
+
26
+ def get_divider_syntax(self) -> SimpleSyntaxDefinition:
27
+ return self._definitions[SyntaxDefinitionRegistryKey.DIVIDER]
28
+
29
+ def get_numbered_list_syntax(self) -> SimpleSyntaxDefinition:
30
+ return self._definitions[SyntaxDefinitionRegistryKey.NUMBERED_LIST]
31
+
32
+ def get_quote_syntax(self) -> SimpleSyntaxDefinition:
33
+ return self._definitions[SyntaxDefinitionRegistryKey.QUOTE]
34
+
35
+ def get_table_syntax(self) -> SimpleSyntaxDefinition:
36
+ return self._definitions[SyntaxDefinitionRegistryKey.TABLE]
37
+
38
+ def get_table_row_syntax(self) -> SimpleSyntaxDefinition:
39
+ return self._definitions[SyntaxDefinitionRegistryKey.TABLE_ROW]
40
+
41
+ def get_table_of_contents_syntax(self) -> SimpleSyntaxDefinition:
42
+ return self._definitions[SyntaxDefinitionRegistryKey.TABLE_OF_CONTENTS]
43
+
44
+ def get_todo_syntax(self) -> SimpleSyntaxDefinition:
45
+ return self._definitions[SyntaxDefinitionRegistryKey.TO_DO]
46
+
47
+ def get_todo_done_syntax(self) -> SimpleSyntaxDefinition:
48
+ return self._definitions[SyntaxDefinitionRegistryKey.TO_DO_DONE]
49
+
50
+ def get_caption_syntax(self) -> SimpleSyntaxDefinition:
51
+ return self._definitions[SyntaxDefinitionRegistryKey.CAPTION]
52
+
53
+ def get_space_syntax(self) -> SimpleSyntaxDefinition:
54
+ return self._definitions[SyntaxDefinitionRegistryKey.SPACE]
55
+
56
+ def get_heading_syntax(self) -> SimpleSyntaxDefinition:
57
+ return self._definitions[SyntaxDefinitionRegistryKey.HEADING]
58
+
59
+ def get_audio_syntax(self) -> EnclosedSyntaxDefinition:
60
+ return self._definitions[SyntaxDefinitionRegistryKey.AUDIO]
61
+
62
+ def get_bookmark_syntax(self) -> EnclosedSyntaxDefinition:
63
+ return self._definitions[SyntaxDefinitionRegistryKey.BOOKMARK]
64
+
65
+ def get_embed_syntax(self) -> EnclosedSyntaxDefinition:
66
+ return self._definitions[SyntaxDefinitionRegistryKey.EMBED]
67
+
68
+ def get_file_syntax(self) -> EnclosedSyntaxDefinition:
69
+ return self._definitions[SyntaxDefinitionRegistryKey.FILE]
70
+
71
+ def get_image_syntax(self) -> EnclosedSyntaxDefinition:
72
+ return self._definitions[SyntaxDefinitionRegistryKey.IMAGE]
73
+
74
+ def get_pdf_syntax(self) -> EnclosedSyntaxDefinition:
75
+ return self._definitions[SyntaxDefinitionRegistryKey.PDF]
76
+
77
+ def get_video_syntax(self) -> EnclosedSyntaxDefinition:
78
+ return self._definitions[SyntaxDefinitionRegistryKey.VIDEO]
79
+
80
+ def get_callout_syntax(self) -> EnclosedSyntaxDefinition:
81
+ return self._definitions[SyntaxDefinitionRegistryKey.CALLOUT]
82
+
83
+ def get_code_syntax(self) -> EnclosedSyntaxDefinition:
84
+ return self._definitions[SyntaxDefinitionRegistryKey.CODE]
85
+
86
+ def get_column_syntax(self) -> EnclosedSyntaxDefinition:
87
+ return self._definitions[SyntaxDefinitionRegistryKey.COLUMN]
88
+
89
+ def get_column_list_syntax(self) -> EnclosedSyntaxDefinition:
90
+ return self._definitions[SyntaxDefinitionRegistryKey.COLUMN_LIST]
91
+
92
+ def get_equation_syntax(self) -> EnclosedSyntaxDefinition:
93
+ return self._definitions[SyntaxDefinitionRegistryKey.EQUATION]
94
+
95
+ def get_toggle_syntax(self) -> EnclosedSyntaxDefinition:
96
+ return self._definitions[SyntaxDefinitionRegistryKey.TOGGLE]
97
+
98
+ def get_toggleable_heading_syntax(self) -> EnclosedSyntaxDefinition:
99
+ return self._definitions[SyntaxDefinitionRegistryKey.TOGGLEABLE_HEADING]
100
+
101
+ def _create_media_syntax(
102
+ self, media_type: str, url_pattern: str | None = None
103
+ ) -> EnclosedSyntaxDefinition:
104
+ url_pattern = url_pattern or "[^)]+"
105
+ return EnclosedSyntaxDefinition(
106
+ start_delimiter=f"[{media_type}](",
107
+ end_delimiter=")",
108
+ regex_pattern=re.compile(
109
+ rf"(?<!\!)\[{re.escape(media_type)}\]\(({url_pattern})\)"
110
+ ),
111
+ end_regex_pattern=re.compile(r"\)"),
112
+ )
113
+
114
+ def _create_url_media_syntax(self, media_type: str) -> EnclosedSyntaxDefinition:
115
+ return EnclosedSyntaxDefinition(
116
+ start_delimiter=f"[{media_type}](",
117
+ end_delimiter=")",
118
+ regex_pattern=re.compile(
119
+ rf"(?<!\!)\[{re.escape(media_type)}\]\((https?://[^\s)]+)\)"
120
+ ),
121
+ end_regex_pattern=re.compile(r"\)"),
122
+ )
123
+
124
+ def _register_defaults(self) -> None:
125
+ # Media elements (enclosed)
126
+ self._register_audio_syntax()
127
+ self._register_video_syntax()
128
+ self._register_image_syntax()
129
+ self._register_file_syntax()
130
+ self._register_pdf_syntax()
131
+ self._register_bookmark_syntax()
132
+ self._register_embed_syntax()
133
+
134
+ # Lists (simple)
135
+ self._register_bulleted_list_syntax()
136
+ self._register_numbered_list_syntax()
137
+ self._register_todo_syntax()
138
+ self._register_todo_done_syntax()
139
+
140
+ # Block containers (enclosed)
141
+ self._register_toggle_syntax()
142
+ self._register_toggleable_heading_syntax()
143
+ self._register_callout_syntax()
144
+ self._register_code_syntax()
145
+ self._register_column_list_syntax()
146
+ self._register_column_syntax()
147
+ self._register_equation_syntax()
148
+
149
+ # Text blocks (simple)
150
+ self._register_quote_syntax()
151
+ self._register_heading_syntax()
152
+ self._register_divider_syntax()
153
+ self._register_breadcrumb_syntax()
154
+ self._register_table_of_contents_syntax()
155
+ self._register_table_syntax()
156
+ self._register_table_row_syntax()
157
+ self._register_caption_syntax()
158
+ self._register_space_syntax()
159
+
160
+ # Registration methods - SimpleSyntaxDefinition
161
+ def _register_breadcrumb_syntax(self) -> None:
162
+ definition = SimpleSyntaxDefinition(
163
+ start_delimiter="[breadcrumb]",
164
+ regex_pattern=re.compile(r"^\[breadcrumb\]\s*$", re.IGNORECASE),
165
+ )
166
+ self._definitions[SyntaxDefinitionRegistryKey.BREADCRUMB] = definition
167
+
168
+ def _register_bulleted_list_syntax(self) -> None:
169
+ definition = SimpleSyntaxDefinition(
170
+ start_delimiter="- ",
171
+ regex_pattern=re.compile(r"^(\s*)-\s+(?!\[[ xX]\])(.+)$"),
172
+ )
173
+ self._definitions[SyntaxDefinitionRegistryKey.BULLETED_LIST] = definition
174
+
175
+ def _register_divider_syntax(self) -> None:
176
+ definition = SimpleSyntaxDefinition(
177
+ start_delimiter="---",
178
+ regex_pattern=re.compile(r"^\s*-{3,}\s*$"),
179
+ )
180
+ self._definitions[SyntaxDefinitionRegistryKey.DIVIDER] = definition
181
+
182
+ def _register_numbered_list_syntax(self) -> None:
183
+ definition = SimpleSyntaxDefinition(
184
+ start_delimiter="1. ",
185
+ regex_pattern=re.compile(r"^(\s*)(\d+)\.\s+(.+)$"),
186
+ )
187
+ self._definitions[SyntaxDefinitionRegistryKey.NUMBERED_LIST] = definition
188
+
189
+ def _register_quote_syntax(self) -> None:
190
+ definition = SimpleSyntaxDefinition(
191
+ start_delimiter="> ",
192
+ regex_pattern=re.compile(r"^>(?!>)\s*(.+)$"),
193
+ )
194
+ self._definitions[SyntaxDefinitionRegistryKey.QUOTE] = definition
195
+
196
+ def _register_table_syntax(self) -> None:
197
+ delimiter = self._markdown_grammar.table_delimiter
198
+ definition = SimpleSyntaxDefinition(
199
+ start_delimiter=delimiter,
200
+ regex_pattern=re.compile(
201
+ rf"^\s*{re.escape(delimiter)}(.+){re.escape(delimiter)}\s*$"
202
+ ),
203
+ )
204
+ self._definitions[SyntaxDefinitionRegistryKey.TABLE] = definition
205
+
206
+ def _register_table_row_syntax(self) -> None:
207
+ delimiter = self._markdown_grammar.table_delimiter
208
+ definition = SimpleSyntaxDefinition(
209
+ start_delimiter=delimiter,
210
+ regex_pattern=re.compile(
211
+ rf"^\s*{re.escape(delimiter)}([\s\-:|]+){re.escape(delimiter)}\s*$"
212
+ ),
213
+ )
214
+ self._definitions[SyntaxDefinitionRegistryKey.TABLE_ROW] = definition
215
+
216
+ def _register_table_of_contents_syntax(self) -> None:
217
+ definition = SimpleSyntaxDefinition(
218
+ start_delimiter="[toc]",
219
+ regex_pattern=re.compile(r"^\[toc\]$", re.IGNORECASE),
220
+ )
221
+ self._definitions[SyntaxDefinitionRegistryKey.TABLE_OF_CONTENTS] = definition
222
+
223
+ def _register_todo_syntax(self) -> None:
224
+ definition = SimpleSyntaxDefinition(
225
+ start_delimiter="- [ ]",
226
+ regex_pattern=re.compile(r"^\s*-\s+\[ \]\s+(.+)$"),
227
+ )
228
+ self._definitions[SyntaxDefinitionRegistryKey.TO_DO] = definition
229
+
230
+ def _register_todo_done_syntax(self) -> None:
231
+ definition = SimpleSyntaxDefinition(
232
+ start_delimiter="- [x]",
233
+ regex_pattern=re.compile(r"^\s*-\s+\[x\]\s+(.+)$", re.IGNORECASE),
234
+ )
235
+ self._definitions[SyntaxDefinitionRegistryKey.TO_DO_DONE] = definition
236
+
237
+ def _register_caption_syntax(self) -> None:
238
+ definition = SimpleSyntaxDefinition(
239
+ start_delimiter="[caption]",
240
+ regex_pattern=re.compile(r"^\[caption\]\s+(\S.*)$"),
241
+ )
242
+ self._definitions[SyntaxDefinitionRegistryKey.CAPTION] = definition
243
+
244
+ def _register_space_syntax(self) -> None:
245
+ definition = SimpleSyntaxDefinition(
246
+ start_delimiter="[space]",
247
+ regex_pattern=re.compile(r"^\[space\]\s*$"),
248
+ )
249
+ self._definitions[SyntaxDefinitionRegistryKey.SPACE] = definition
250
+
251
+ def _register_heading_syntax(self) -> None:
252
+ definition = SimpleSyntaxDefinition(
253
+ start_delimiter="#",
254
+ regex_pattern=re.compile(r"^(#{1,3})[ \t]+(.+)$"),
255
+ )
256
+ self._definitions[SyntaxDefinitionRegistryKey.HEADING] = definition
257
+
258
+ # Registration methods - EnclosedSyntaxDefinition
259
+ def _register_audio_syntax(self) -> None:
260
+ self._definitions[SyntaxDefinitionRegistryKey.AUDIO] = (
261
+ self._create_media_syntax("audio")
262
+ )
263
+
264
+ def _register_video_syntax(self) -> None:
265
+ self._definitions[SyntaxDefinitionRegistryKey.VIDEO] = (
266
+ self._create_media_syntax("video")
267
+ )
268
+
269
+ def _register_image_syntax(self) -> None:
270
+ self._definitions[SyntaxDefinitionRegistryKey.IMAGE] = (
271
+ self._create_media_syntax("image")
272
+ )
273
+
274
+ def _register_file_syntax(self) -> None:
275
+ self._definitions[SyntaxDefinitionRegistryKey.FILE] = self._create_media_syntax(
276
+ "file"
277
+ )
278
+
279
+ def _register_pdf_syntax(self) -> None:
280
+ self._definitions[SyntaxDefinitionRegistryKey.PDF] = self._create_media_syntax(
281
+ "pdf"
282
+ )
283
+
284
+ def _register_bookmark_syntax(self) -> None:
285
+ self._definitions[SyntaxDefinitionRegistryKey.BOOKMARK] = (
286
+ self._create_url_media_syntax("bookmark")
287
+ )
288
+
289
+ def _register_embed_syntax(self) -> None:
290
+ self._definitions[SyntaxDefinitionRegistryKey.EMBED] = (
291
+ self._create_url_media_syntax("embed")
292
+ )
293
+
294
+ def _register_callout_syntax(self) -> None:
295
+ definition = EnclosedSyntaxDefinition(
296
+ start_delimiter="[callout]",
297
+ end_delimiter=")",
298
+ regex_pattern=re.compile(
299
+ r'\[callout\](?:\(([^")]+?)(?:\s+"([^"]+)")?\)|(?:\s+([^"\n]+?)(?:\s+"([^"]+)")?)(?:\n|$))'
300
+ ),
301
+ end_regex_pattern=re.compile(r"\)"),
302
+ )
303
+ self._definitions[SyntaxDefinitionRegistryKey.CALLOUT] = definition
304
+
305
+ def _register_code_syntax(self) -> None:
306
+ code_delimiter = "```"
307
+ definition = EnclosedSyntaxDefinition(
308
+ start_delimiter=code_delimiter,
309
+ end_delimiter=code_delimiter,
310
+ regex_pattern=re.compile("^" + re.escape(code_delimiter) + r"(\w*)\s*$"),
311
+ end_regex_pattern=re.compile("^" + re.escape(code_delimiter) + r"\s*$"),
312
+ )
313
+ self._definitions[SyntaxDefinitionRegistryKey.CODE] = definition
314
+
315
+ def _register_column_syntax(self) -> None:
316
+ delimiter = self._markdown_grammar.column_delimiter
317
+ definition = EnclosedSyntaxDefinition(
318
+ start_delimiter=f"{delimiter} column",
319
+ end_delimiter=delimiter,
320
+ regex_pattern=re.compile(
321
+ rf"^{re.escape(delimiter)}\s*column(?:\s+(0?\.\d+|1(?:\.0?)?))??\s*$",
322
+ re.IGNORECASE | re.MULTILINE,
323
+ ),
324
+ end_regex_pattern=re.compile(rf"^{re.escape(delimiter)}\s*$", re.MULTILINE),
325
+ )
326
+ self._definitions[SyntaxDefinitionRegistryKey.COLUMN] = definition
327
+
328
+ def _register_column_list_syntax(self) -> None:
329
+ delimiter = self._markdown_grammar.column_delimiter
330
+ definition = EnclosedSyntaxDefinition(
331
+ start_delimiter=f"{delimiter} columns",
332
+ end_delimiter=delimiter,
333
+ regex_pattern=re.compile(
334
+ rf"^{re.escape(delimiter)}\s*columns?\s*$", re.IGNORECASE
335
+ ),
336
+ end_regex_pattern=re.compile(rf"^{re.escape(delimiter)}\s*$"),
337
+ )
338
+ self._definitions[SyntaxDefinitionRegistryKey.COLUMN_LIST] = definition
339
+
340
+ def _register_equation_syntax(self) -> None:
341
+ definition = EnclosedSyntaxDefinition(
342
+ start_delimiter="$$",
343
+ end_delimiter="$$",
344
+ regex_pattern=re.compile(r"^\$\$\s*$"),
345
+ end_regex_pattern=re.compile(r"^\$\$\s*$"),
346
+ )
347
+ self._definitions[SyntaxDefinitionRegistryKey.EQUATION] = definition
348
+
349
+ def _register_toggle_syntax(self) -> None:
350
+ delimiter = self._markdown_grammar.toggle_delimiter
351
+ definition = EnclosedSyntaxDefinition(
352
+ start_delimiter=delimiter,
353
+ end_delimiter=delimiter,
354
+ regex_pattern=re.compile(rf"^{re.escape(delimiter)}\s+(.+)$"),
355
+ end_regex_pattern=re.compile(rf"^{re.escape(delimiter)}\s*$"),
356
+ )
357
+ self._definitions[SyntaxDefinitionRegistryKey.TOGGLE] = definition
358
+
359
+ def _register_toggleable_heading_syntax(self) -> None:
360
+ delimiter = self._markdown_grammar.toggle_delimiter
361
+ escaped_delimiter = re.escape(delimiter)
362
+ definition = EnclosedSyntaxDefinition(
363
+ start_delimiter=f"{delimiter} #",
364
+ end_delimiter=delimiter,
365
+ regex_pattern=re.compile(
366
+ rf"^{escaped_delimiter}\s*(?P<level>#{{1,3}})(?!#)\s*(.+)$",
367
+ re.IGNORECASE,
368
+ ),
369
+ end_regex_pattern=re.compile(rf"^{escaped_delimiter}\s*$"),
370
+ )
371
+ self._definitions[SyntaxDefinitionRegistryKey.TOGGLEABLE_HEADING] = definition
@@ -0,0 +1,4 @@
1
+ from .models import SyntaxPromptData
2
+ from .registry import SyntaxPromptRegistry
3
+
4
+ __all__ = ["SyntaxPromptData", "SyntaxPromptRegistry"]
@@ -0,0 +1,11 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass(frozen=True)
5
+ class SyntaxPromptData:
6
+ element: str
7
+ description: str
8
+ is_multi_line: bool
9
+ few_shot_examples: list[str]
10
+ usage_notes: str
11
+ supports_inline_rich_text: bool