notionary 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +44 -1
- notionary/blocks/client.py +37 -11
- notionary/blocks/rich_text/markdown_rich_text_converter.py +49 -15
- notionary/blocks/rich_text/models.py +13 -4
- notionary/blocks/rich_text/name_id_resolver/data_source.py +9 -3
- notionary/blocks/rich_text/name_id_resolver/person.py +6 -2
- notionary/blocks/rich_text/rich_text_markdown_converter.py +10 -3
- notionary/blocks/schemas.py +2 -1
- notionary/comments/client.py +19 -6
- notionary/comments/factory.py +10 -3
- notionary/comments/schemas.py +9 -3
- notionary/comments/service.py +12 -4
- notionary/data_source/http/data_source_instance_client.py +59 -17
- notionary/data_source/properties/schemas.py +30 -10
- notionary/data_source/query/builder.py +67 -18
- notionary/data_source/query/resolver.py +16 -5
- notionary/data_source/query/schema.py +24 -6
- notionary/data_source/query/validator.py +18 -6
- notionary/data_source/schema/registry.py +31 -12
- notionary/data_source/schema/service.py +66 -20
- notionary/data_source/service.py +74 -23
- notionary/database/client.py +27 -9
- notionary/database/database_metadata_update_client.py +12 -4
- notionary/database/service.py +11 -4
- notionary/exceptions/__init__.py +15 -3
- notionary/exceptions/block_parsing.py +6 -2
- notionary/exceptions/data_source/builder.py +11 -5
- notionary/exceptions/data_source/properties.py +3 -1
- notionary/exceptions/file_upload.py +12 -3
- notionary/exceptions/properties.py +3 -1
- notionary/exceptions/search.py +6 -2
- notionary/file_upload/client.py +5 -1
- notionary/file_upload/config/config.py +10 -3
- notionary/file_upload/query/builder.py +6 -2
- notionary/file_upload/schemas.py +3 -1
- notionary/file_upload/service.py +42 -14
- notionary/file_upload/validation/factory.py +3 -1
- notionary/file_upload/validation/impl/file_name_length.py +3 -1
- notionary/file_upload/validation/models.py +15 -5
- notionary/file_upload/validation/validators/file_extension.py +12 -3
- notionary/http/client.py +27 -8
- notionary/page/content/__init__.py +9 -0
- notionary/page/content/factory.py +21 -7
- notionary/page/content/markdown/builder.py +85 -23
- notionary/page/content/markdown/nodes/audio.py +8 -4
- notionary/page/content/markdown/nodes/base.py +3 -3
- notionary/page/content/markdown/nodes/bookmark.py +5 -3
- notionary/page/content/markdown/nodes/breadcrumb.py +2 -2
- notionary/page/content/markdown/nodes/bulleted_list.py +5 -3
- notionary/page/content/markdown/nodes/callout.py +2 -2
- notionary/page/content/markdown/nodes/code.py +5 -3
- notionary/page/content/markdown/nodes/columns.py +3 -3
- notionary/page/content/markdown/nodes/container.py +9 -5
- notionary/page/content/markdown/nodes/divider.py +2 -2
- notionary/page/content/markdown/nodes/embed.py +8 -4
- notionary/page/content/markdown/nodes/equation.py +4 -2
- notionary/page/content/markdown/nodes/file.py +8 -4
- notionary/page/content/markdown/nodes/heading.py +2 -2
- notionary/page/content/markdown/nodes/image.py +8 -4
- notionary/page/content/markdown/nodes/mixins/caption.py +5 -3
- notionary/page/content/markdown/nodes/numbered_list.py +5 -3
- notionary/page/content/markdown/nodes/paragraph.py +4 -2
- notionary/page/content/markdown/nodes/pdf.py +8 -4
- notionary/page/content/markdown/nodes/quote.py +2 -2
- notionary/page/content/markdown/nodes/space.py +2 -2
- notionary/page/content/markdown/nodes/table.py +8 -5
- notionary/page/content/markdown/nodes/table_of_contents.py +2 -2
- notionary/page/content/markdown/nodes/todo.py +15 -7
- notionary/page/content/markdown/nodes/toggle.py +2 -2
- notionary/page/content/markdown/nodes/video.py +8 -4
- notionary/page/content/markdown/structured_output/__init__.py +73 -0
- notionary/page/content/markdown/structured_output/models.py +391 -0
- notionary/page/content/markdown/structured_output/service.py +211 -0
- notionary/page/content/parser/context.py +1 -1
- notionary/page/content/parser/factory.py +23 -8
- notionary/page/content/parser/parsers/audio.py +7 -2
- notionary/page/content/parser/parsers/base.py +2 -2
- notionary/page/content/parser/parsers/bookmark.py +2 -2
- notionary/page/content/parser/parsers/breadcrumb.py +2 -2
- notionary/page/content/parser/parsers/bulleted_list.py +19 -6
- notionary/page/content/parser/parsers/callout.py +15 -5
- notionary/page/content/parser/parsers/caption.py +9 -3
- notionary/page/content/parser/parsers/code.py +21 -7
- notionary/page/content/parser/parsers/column.py +8 -4
- notionary/page/content/parser/parsers/column_list.py +19 -7
- notionary/page/content/parser/parsers/divider.py +2 -2
- notionary/page/content/parser/parsers/embed.py +2 -2
- notionary/page/content/parser/parsers/equation.py +8 -4
- notionary/page/content/parser/parsers/file.py +7 -2
- notionary/page/content/parser/parsers/file_like_block.py +30 -10
- notionary/page/content/parser/parsers/heading.py +31 -10
- notionary/page/content/parser/parsers/image.py +7 -2
- notionary/page/content/parser/parsers/numbered_list.py +18 -6
- notionary/page/content/parser/parsers/paragraph.py +3 -1
- notionary/page/content/parser/parsers/pdf.py +7 -2
- notionary/page/content/parser/parsers/quote.py +28 -9
- notionary/page/content/parser/parsers/space.py +2 -2
- notionary/page/content/parser/parsers/table.py +31 -10
- notionary/page/content/parser/parsers/table_of_contents.py +7 -3
- notionary/page/content/parser/parsers/todo.py +15 -5
- notionary/page/content/parser/parsers/toggle.py +15 -5
- notionary/page/content/parser/parsers/video.py +7 -2
- notionary/page/content/parser/post_processing/handlers/rich_text_length.py +8 -2
- notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +8 -2
- notionary/page/content/parser/post_processing/service.py +3 -1
- notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +21 -7
- notionary/page/content/parser/pre_processsing/handlers/indentation.py +11 -4
- notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +13 -6
- notionary/page/content/parser/service.py +4 -1
- notionary/page/content/renderer/context.py +15 -5
- notionary/page/content/renderer/factory.py +12 -6
- notionary/page/content/renderer/post_processing/handlers/numbered_list.py +19 -9
- notionary/page/content/renderer/renderers/audio.py +14 -5
- notionary/page/content/renderer/renderers/base.py +3 -3
- notionary/page/content/renderer/renderers/bookmark.py +3 -1
- notionary/page/content/renderer/renderers/bulleted_list.py +11 -5
- notionary/page/content/renderer/renderers/callout.py +19 -7
- notionary/page/content/renderer/renderers/captioned_block.py +11 -5
- notionary/page/content/renderer/renderers/code.py +6 -2
- notionary/page/content/renderer/renderers/column.py +3 -1
- notionary/page/content/renderer/renderers/column_list.py +3 -1
- notionary/page/content/renderer/renderers/embed.py +3 -1
- notionary/page/content/renderer/renderers/equation.py +3 -1
- notionary/page/content/renderer/renderers/file.py +14 -5
- notionary/page/content/renderer/renderers/file_like_block.py +8 -4
- notionary/page/content/renderer/renderers/heading.py +22 -8
- notionary/page/content/renderer/renderers/image.py +13 -4
- notionary/page/content/renderer/renderers/numbered_list.py +8 -3
- notionary/page/content/renderer/renderers/paragraph.py +12 -4
- notionary/page/content/renderer/renderers/pdf.py +14 -5
- notionary/page/content/renderer/renderers/quote.py +14 -6
- notionary/page/content/renderer/renderers/table.py +15 -5
- notionary/page/content/renderer/renderers/todo.py +16 -6
- notionary/page/content/renderer/renderers/toggle.py +8 -4
- notionary/page/content/renderer/renderers/video.py +14 -5
- notionary/page/content/renderer/service.py +9 -3
- notionary/page/content/service.py +21 -7
- notionary/page/content/syntax/definition/__init__.py +11 -0
- notionary/page/content/syntax/definition/models.py +57 -0
- notionary/page/content/syntax/definition/registry.py +371 -0
- notionary/page/content/syntax/prompts/__init__.py +4 -0
- notionary/page/content/syntax/prompts/models.py +11 -0
- notionary/page/content/syntax/prompts/registry.py +703 -0
- notionary/page/page_metadata_update_client.py +12 -4
- notionary/page/properties/client.py +45 -15
- notionary/page/properties/factory.py +6 -2
- notionary/page/properties/service.py +110 -36
- notionary/page/service.py +20 -6
- notionary/shared/entity/client.py +6 -2
- notionary/shared/entity/dto_parsers.py +3 -1
- notionary/shared/entity/entity_metadata_update_client.py +9 -3
- notionary/shared/entity/service.py +53 -22
- notionary/shared/models/file.py +3 -1
- notionary/user/base.py +6 -2
- notionary/user/bot.py +10 -2
- notionary/user/client.py +3 -1
- notionary/user/person.py +3 -1
- notionary/user/schemas.py +3 -1
- notionary/user/service.py +6 -2
- notionary/utils/decorators.py +6 -2
- notionary/utils/fuzzy.py +6 -2
- notionary/utils/mixins/logging.py +3 -1
- notionary/utils/pagination.py +14 -4
- notionary/workspace/__init__.py +5 -1
- notionary/workspace/query/service.py +59 -16
- notionary/workspace/service.py +39 -11
- {notionary-0.4.0.dist-info → notionary-0.4.1.dist-info}/METADATA +1 -1
- notionary-0.4.1.dist-info/RECORD +236 -0
- notionary/page/blocks/client.py +0 -1
- notionary/page/content/syntax/__init__.py +0 -5
- notionary/page/content/syntax/models.py +0 -66
- notionary/page/content/syntax/registry.py +0 -371
- notionary-0.4.0.dist-info/RECORD +0 -230
- /notionary/page/content/syntax/{grammar.py → definition/grammar.py} +0 -0
- {notionary-0.4.0.dist-info → notionary-0.4.1.dist-info}/WHEEL +0 -0
- {notionary-0.4.0.dist-info → notionary-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from notionary.page.content.markdown.builder import MarkdownBuilder
|
|
4
|
+
from notionary.page.content.markdown.structured_output.models import (
|
|
5
|
+
AudioSchema,
|
|
6
|
+
BookmarkSchema,
|
|
7
|
+
BreadcrumbSchema,
|
|
8
|
+
BulletedListItemSchema,
|
|
9
|
+
BulletedListSchema,
|
|
10
|
+
CalloutSchema,
|
|
11
|
+
CodeSchema,
|
|
12
|
+
ColumnsSchema,
|
|
13
|
+
EmbedSchema,
|
|
14
|
+
EquationSchema,
|
|
15
|
+
FileSchema,
|
|
16
|
+
Heading1Schema,
|
|
17
|
+
Heading2Schema,
|
|
18
|
+
Heading3Schema,
|
|
19
|
+
ImageSchema,
|
|
20
|
+
MarkdownDocumentSchema,
|
|
21
|
+
MarkdownNodeSchema,
|
|
22
|
+
MermaidSchema,
|
|
23
|
+
NumberedListItemSchema,
|
|
24
|
+
NumberedListSchema,
|
|
25
|
+
ParagraphSchema,
|
|
26
|
+
PdfSchema,
|
|
27
|
+
QuoteSchema,
|
|
28
|
+
TableOfContentsSchema,
|
|
29
|
+
TableSchema,
|
|
30
|
+
TodoListSchema,
|
|
31
|
+
TodoSchema,
|
|
32
|
+
ToggleSchema,
|
|
33
|
+
VideoSchema,
|
|
34
|
+
)
|
|
35
|
+
from notionary.utils.decorators import time_execution_sync
|
|
36
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class StructuredOutputMarkdownConverter(LoggingMixin):
|
|
40
|
+
def __init__(self, builder: MarkdownBuilder | None = None) -> None:
|
|
41
|
+
self.builder = builder or MarkdownBuilder()
|
|
42
|
+
|
|
43
|
+
@time_execution_sync()
|
|
44
|
+
def convert(self, schema: MarkdownDocumentSchema) -> str:
|
|
45
|
+
for node in schema.nodes:
|
|
46
|
+
self._process_node(node)
|
|
47
|
+
return self.builder.build()
|
|
48
|
+
|
|
49
|
+
def _process_node(self, node: MarkdownNodeSchema) -> None:
|
|
50
|
+
node.process_with(self)
|
|
51
|
+
|
|
52
|
+
def _process_heading_1(self, node: Heading1Schema) -> None:
|
|
53
|
+
builder_func = (
|
|
54
|
+
self._create_children_builder(node.children) if node.children else None
|
|
55
|
+
)
|
|
56
|
+
self.builder.h1(node.text, builder_func)
|
|
57
|
+
|
|
58
|
+
def _process_heading_2(self, node: Heading2Schema) -> None:
|
|
59
|
+
builder_func = (
|
|
60
|
+
self._create_children_builder(node.children) if node.children else None
|
|
61
|
+
)
|
|
62
|
+
self.builder.h2(node.text, builder_func)
|
|
63
|
+
|
|
64
|
+
def _process_heading_3(self, node: Heading3Schema) -> None:
|
|
65
|
+
builder_func = (
|
|
66
|
+
self._create_children_builder(node.children) if node.children else None
|
|
67
|
+
)
|
|
68
|
+
self.builder.h3(node.text, builder_func)
|
|
69
|
+
|
|
70
|
+
def _process_paragraph(self, node: ParagraphSchema) -> None:
|
|
71
|
+
self.builder.paragraph(node.text)
|
|
72
|
+
|
|
73
|
+
def _process_space(self) -> None:
|
|
74
|
+
self.builder.space()
|
|
75
|
+
|
|
76
|
+
def _process_divider(self) -> None:
|
|
77
|
+
self.builder.divider()
|
|
78
|
+
|
|
79
|
+
def _process_quote(self, node: QuoteSchema) -> None:
|
|
80
|
+
builder_func = (
|
|
81
|
+
self._create_children_builder(node.children) if node.children else None
|
|
82
|
+
)
|
|
83
|
+
self.builder.quote(node.text, builder_func)
|
|
84
|
+
|
|
85
|
+
def _process_bulleted_list(self, node: BulletedListSchema) -> None:
|
|
86
|
+
has_children = any(item.children for item in node.items)
|
|
87
|
+
|
|
88
|
+
if has_children:
|
|
89
|
+
for item in node.items:
|
|
90
|
+
self._process_bulleted_list_item(item)
|
|
91
|
+
else:
|
|
92
|
+
texts = [item.text for item in node.items]
|
|
93
|
+
self.builder.bulleted_list(texts)
|
|
94
|
+
|
|
95
|
+
def _process_bulleted_list_item(self, node: BulletedListItemSchema) -> None:
|
|
96
|
+
builder_func = (
|
|
97
|
+
self._create_children_builder(node.children) if node.children else None
|
|
98
|
+
)
|
|
99
|
+
self.builder.bulleted_list_item(node.text, builder_func)
|
|
100
|
+
|
|
101
|
+
def _process_numbered_list(self, node: NumberedListSchema) -> None:
|
|
102
|
+
has_children = any(item.children for item in node.items)
|
|
103
|
+
|
|
104
|
+
if has_children:
|
|
105
|
+
for item in node.items:
|
|
106
|
+
self._process_numbered_list_item(item)
|
|
107
|
+
else:
|
|
108
|
+
texts = [item.text for item in node.items]
|
|
109
|
+
self.builder.numbered_list(texts)
|
|
110
|
+
|
|
111
|
+
def _process_numbered_list_item(self, node: NumberedListItemSchema) -> None:
|
|
112
|
+
builder_func = (
|
|
113
|
+
self._create_children_builder(node.children) if node.children else None
|
|
114
|
+
)
|
|
115
|
+
self.builder.numbered_list_item(node.text, builder_func)
|
|
116
|
+
|
|
117
|
+
def _process_todo(self, node: TodoSchema) -> None:
|
|
118
|
+
builder_func = (
|
|
119
|
+
self._create_children_builder(node.children) if node.children else None
|
|
120
|
+
)
|
|
121
|
+
self.builder.todo(node.text, checked=node.checked, builder_func=builder_func)
|
|
122
|
+
|
|
123
|
+
def _process_todo_list(self, node: TodoListSchema) -> None:
|
|
124
|
+
has_children = any(item.children for item in node.items)
|
|
125
|
+
|
|
126
|
+
if has_children:
|
|
127
|
+
for todo_item in node.items:
|
|
128
|
+
self._process_todo(todo_item)
|
|
129
|
+
else:
|
|
130
|
+
texts = [item.text for item in node.items]
|
|
131
|
+
completed = [item.checked for item in node.items]
|
|
132
|
+
self.builder.todo_list(texts, completed)
|
|
133
|
+
|
|
134
|
+
def _process_callout(self, node: CalloutSchema) -> None:
|
|
135
|
+
if node.children:
|
|
136
|
+
builder_func = self._create_children_builder(node.children)
|
|
137
|
+
self.builder.callout_with_children(node.text, node.emoji, builder_func)
|
|
138
|
+
else:
|
|
139
|
+
self.builder.callout(node.text, node.emoji)
|
|
140
|
+
|
|
141
|
+
def _process_toggle(self, node: ToggleSchema) -> None:
|
|
142
|
+
builder_func = self._create_children_builder(node.children)
|
|
143
|
+
self.builder.toggle(node.title, builder_func)
|
|
144
|
+
|
|
145
|
+
def _process_image(self, node: ImageSchema) -> None:
|
|
146
|
+
self.builder.image(node.url, node.caption)
|
|
147
|
+
|
|
148
|
+
def _process_video(self, node: VideoSchema) -> None:
|
|
149
|
+
self.builder.video(node.url, node.caption)
|
|
150
|
+
|
|
151
|
+
def _process_audio(self, node: AudioSchema) -> None:
|
|
152
|
+
self.builder.audio(node.url, node.caption)
|
|
153
|
+
|
|
154
|
+
def _process_file(self, node: FileSchema) -> None:
|
|
155
|
+
self.builder.file(node.url, node.caption)
|
|
156
|
+
|
|
157
|
+
def _process_pdf(self, node: PdfSchema) -> None:
|
|
158
|
+
self.builder.pdf(node.url, node.caption)
|
|
159
|
+
|
|
160
|
+
def _process_bookmark(self, node: BookmarkSchema) -> None:
|
|
161
|
+
self.builder.bookmark(node.url, node.title, node.caption)
|
|
162
|
+
|
|
163
|
+
def _process_embed(self, node: EmbedSchema) -> None:
|
|
164
|
+
self.builder.embed(node.url, node.caption)
|
|
165
|
+
|
|
166
|
+
def _process_code(self, node: CodeSchema) -> None:
|
|
167
|
+
self.builder.code(node.code, node.language, node.caption)
|
|
168
|
+
|
|
169
|
+
def _process_mermaid(self, node: MermaidSchema) -> None:
|
|
170
|
+
self.builder.mermaid(node.diagram, node.caption)
|
|
171
|
+
|
|
172
|
+
def _process_table(self, node: TableSchema) -> None:
|
|
173
|
+
self.builder.table(node.headers, node.rows)
|
|
174
|
+
|
|
175
|
+
def _process_breadcrumb(self, node: BreadcrumbSchema) -> None:
|
|
176
|
+
self.builder.breadcrumb()
|
|
177
|
+
|
|
178
|
+
def _process_equation(self, node: EquationSchema) -> None:
|
|
179
|
+
self.builder.equation(node.expression)
|
|
180
|
+
|
|
181
|
+
def _process_table_of_contents(self, node: TableOfContentsSchema) -> None:
|
|
182
|
+
self.builder.table_of_contents()
|
|
183
|
+
|
|
184
|
+
def _process_columns(self, node: ColumnsSchema) -> None:
|
|
185
|
+
builder_funcs = []
|
|
186
|
+
width_ratios = []
|
|
187
|
+
|
|
188
|
+
for column in node.columns:
|
|
189
|
+
builder_func = self._create_children_builder(column.children)
|
|
190
|
+
builder_funcs.append(builder_func)
|
|
191
|
+
width_ratios.append(column.width_ratio)
|
|
192
|
+
|
|
193
|
+
if any(r is not None for r in width_ratios):
|
|
194
|
+
self.builder.columns(*builder_funcs, width_ratios=width_ratios)
|
|
195
|
+
else:
|
|
196
|
+
self.builder.columns(*builder_funcs)
|
|
197
|
+
|
|
198
|
+
def _create_children_builder(self, children: list[MarkdownNodeSchema] | None):
|
|
199
|
+
if not children:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
captured_children = children
|
|
203
|
+
|
|
204
|
+
def builder_func(builder: MarkdownBuilder) -> MarkdownBuilder:
|
|
205
|
+
converter = StructuredOutputMarkdownConverter()
|
|
206
|
+
converter.builder = builder
|
|
207
|
+
for child in captured_children:
|
|
208
|
+
converter._process_node(child)
|
|
209
|
+
return builder
|
|
210
|
+
|
|
211
|
+
return builder_func
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from collections.abc import Awaitable, Callable
|
|
4
4
|
|
|
5
5
|
from notionary.blocks.schemas import BlockCreatePayload
|
|
6
|
-
from notionary.page.content.syntax.grammar import MarkdownGrammar
|
|
6
|
+
from notionary.page.content.syntax.definition.grammar import MarkdownGrammar
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ParentBlockContext:
|
|
@@ -30,18 +30,18 @@ from notionary.page.content.parser.parsers import (
|
|
|
30
30
|
ToggleParser,
|
|
31
31
|
VideoParser,
|
|
32
32
|
)
|
|
33
|
-
from notionary.page.content.syntax import
|
|
33
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class ConverterChainFactory:
|
|
37
37
|
def __init__(
|
|
38
38
|
self,
|
|
39
39
|
rich_text_converter: MarkdownRichTextConverter | None = None,
|
|
40
|
-
syntax_registry:
|
|
40
|
+
syntax_registry: SyntaxDefinitionRegistry | None = None,
|
|
41
41
|
file_upload_service: NotionFileUpload | None = None,
|
|
42
42
|
) -> None:
|
|
43
43
|
self._rich_text_converter = rich_text_converter or MarkdownRichTextConverter()
|
|
44
|
-
self._syntax_registry = syntax_registry or
|
|
44
|
+
self._syntax_registry = syntax_registry or SyntaxDefinitionRegistry()
|
|
45
45
|
self._file_upload_service = file_upload_service
|
|
46
46
|
|
|
47
47
|
def create(self) -> LineParser:
|
|
@@ -189,19 +189,34 @@ class ConverterChainFactory:
|
|
|
189
189
|
return EmbedParser(syntax_registry=self._syntax_registry)
|
|
190
190
|
|
|
191
191
|
def _create_image_parser(self) -> ImageParser:
|
|
192
|
-
return ImageParser(
|
|
192
|
+
return ImageParser(
|
|
193
|
+
syntax_registry=self._syntax_registry,
|
|
194
|
+
file_upload_service=self._file_upload_service,
|
|
195
|
+
)
|
|
193
196
|
|
|
194
197
|
def _create_video_parser(self) -> VideoParser:
|
|
195
|
-
return VideoParser(
|
|
198
|
+
return VideoParser(
|
|
199
|
+
syntax_registry=self._syntax_registry,
|
|
200
|
+
file_upload_service=self._file_upload_service,
|
|
201
|
+
)
|
|
196
202
|
|
|
197
203
|
def _create_audio_parser(self) -> AudioParser:
|
|
198
|
-
return AudioParser(
|
|
204
|
+
return AudioParser(
|
|
205
|
+
syntax_registry=self._syntax_registry,
|
|
206
|
+
file_upload_service=self._file_upload_service,
|
|
207
|
+
)
|
|
199
208
|
|
|
200
209
|
def _create_file_parser(self) -> FileParser:
|
|
201
|
-
return FileParser(
|
|
210
|
+
return FileParser(
|
|
211
|
+
syntax_registry=self._syntax_registry,
|
|
212
|
+
file_upload_service=self._file_upload_service,
|
|
213
|
+
)
|
|
202
214
|
|
|
203
215
|
def _create_pdf_parser(self) -> PdfParser:
|
|
204
|
-
return PdfParser(
|
|
216
|
+
return PdfParser(
|
|
217
|
+
syntax_registry=self._syntax_registry,
|
|
218
|
+
file_upload_service=self._file_upload_service,
|
|
219
|
+
)
|
|
205
220
|
|
|
206
221
|
def _create_caption_parser(self) -> CaptionParser:
|
|
207
222
|
return CaptionParser(
|
|
@@ -2,12 +2,17 @@ from typing import override
|
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.schemas import CreateAudioBlock, ExternalFileWithCaption
|
|
4
4
|
from notionary.page.content.parser.parsers.file_like_block import FileLikeBlockParser
|
|
5
|
-
from notionary.page.content.syntax import
|
|
5
|
+
from notionary.page.content.syntax.definition import (
|
|
6
|
+
SyntaxDefinition,
|
|
7
|
+
SyntaxDefinitionRegistry,
|
|
8
|
+
)
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class AudioParser(FileLikeBlockParser[CreateAudioBlock]):
|
|
9
12
|
@override
|
|
10
|
-
def _get_syntax(
|
|
13
|
+
def _get_syntax(
|
|
14
|
+
self, syntax_registry: SyntaxDefinitionRegistry
|
|
15
|
+
) -> SyntaxDefinition:
|
|
11
16
|
return syntax_registry.get_audio_syntax()
|
|
12
17
|
|
|
13
18
|
@override
|
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
|
|
5
5
|
from notionary.page.content.parser.context import BlockParsingContext
|
|
6
|
-
from notionary.page.content.syntax import
|
|
6
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class LineParser(ABC):
|
|
10
|
-
def __init__(self, syntax_registry:
|
|
10
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry | None = None) -> None:
|
|
11
11
|
self._next_handler: LineParser | None = None
|
|
12
12
|
self._syntax_registry = syntax_registry
|
|
13
13
|
|
|
@@ -4,11 +4,11 @@ from typing import override
|
|
|
4
4
|
|
|
5
5
|
from notionary.blocks.schemas import BookmarkData, CreateBookmarkBlock
|
|
6
6
|
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
7
|
-
from notionary.page.content.syntax import
|
|
7
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class BookmarkParser(LineParser):
|
|
11
|
-
def __init__(self, syntax_registry:
|
|
11
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry) -> None:
|
|
12
12
|
super().__init__(syntax_registry)
|
|
13
13
|
self._syntax = syntax_registry.get_bookmark_syntax()
|
|
14
14
|
|
|
@@ -5,11 +5,11 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
5
5
|
BlockParsingContext,
|
|
6
6
|
LineParser,
|
|
7
7
|
)
|
|
8
|
-
from notionary.page.content.syntax import
|
|
8
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class BreadcrumbParser(LineParser):
|
|
12
|
-
def __init__(self, syntax_registry:
|
|
12
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry) -> None:
|
|
13
13
|
super().__init__(syntax_registry)
|
|
14
14
|
self._syntax = syntax_registry.get_breadcrumb_syntax()
|
|
15
15
|
|
|
@@ -3,16 +3,23 @@ from typing import override
|
|
|
3
3
|
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
4
4
|
MarkdownRichTextConverter,
|
|
5
5
|
)
|
|
6
|
-
from notionary.blocks.schemas import
|
|
6
|
+
from notionary.blocks.schemas import (
|
|
7
|
+
CreateBulletedListItemBlock,
|
|
8
|
+
CreateBulletedListItemData,
|
|
9
|
+
)
|
|
7
10
|
from notionary.page.content.parser.parsers.base import (
|
|
8
11
|
BlockParsingContext,
|
|
9
12
|
LineParser,
|
|
10
13
|
)
|
|
11
|
-
from notionary.page.content.syntax import
|
|
14
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class BulletedListParser(LineParser):
|
|
15
|
-
def __init__(
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
21
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
22
|
+
) -> None:
|
|
16
23
|
super().__init__(syntax_registry)
|
|
17
24
|
self._syntax = syntax_registry.get_bulleted_list_syntax()
|
|
18
25
|
self._rich_text_converter = rich_text_converter
|
|
@@ -35,7 +42,9 @@ class BulletedListParser(LineParser):
|
|
|
35
42
|
await self._process_nested_children(block, context)
|
|
36
43
|
context.result_blocks.append(block)
|
|
37
44
|
|
|
38
|
-
async def _process_nested_children(
|
|
45
|
+
async def _process_nested_children(
|
|
46
|
+
self, block: CreateBulletedListItemBlock, context: BlockParsingContext
|
|
47
|
+
) -> None:
|
|
39
48
|
child_lines = self._collect_child_lines(context)
|
|
40
49
|
if not child_lines:
|
|
41
50
|
return
|
|
@@ -57,13 +66,17 @@ class BulletedListParser(LineParser):
|
|
|
57
66
|
children_text = self._convert_lines_to_text(stripped_lines)
|
|
58
67
|
return await context.parse_nested_markdown(children_text)
|
|
59
68
|
|
|
60
|
-
def _remove_parent_indentation(
|
|
69
|
+
def _remove_parent_indentation(
|
|
70
|
+
self, lines: list[str], context: BlockParsingContext
|
|
71
|
+
) -> list[str]:
|
|
61
72
|
return context.strip_indentation_level(lines, levels=1)
|
|
62
73
|
|
|
63
74
|
def _convert_lines_to_text(self, lines: list[str]) -> str:
|
|
64
75
|
return "\n".join(lines)
|
|
65
76
|
|
|
66
|
-
async def _create_bulleted_list_block(
|
|
77
|
+
async def _create_bulleted_list_block(
|
|
78
|
+
self, text: str
|
|
79
|
+
) -> CreateBulletedListItemBlock | None:
|
|
67
80
|
content = self._extract_list_content(text)
|
|
68
81
|
if content is None:
|
|
69
82
|
return None
|
|
@@ -1,20 +1,26 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
|
-
from notionary.blocks.rich_text.markdown_rich_text_converter import
|
|
4
|
+
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
5
|
+
MarkdownRichTextConverter,
|
|
6
|
+
)
|
|
5
7
|
from notionary.blocks.schemas import CreateCalloutBlock, CreateCalloutData
|
|
6
8
|
from notionary.page.content.parser.parsers.base import (
|
|
7
9
|
BlockParsingContext,
|
|
8
10
|
LineParser,
|
|
9
11
|
)
|
|
10
|
-
from notionary.page.content.syntax import
|
|
12
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
11
13
|
from notionary.shared.models.icon import EmojiIcon
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class CalloutParser(LineParser):
|
|
15
17
|
DEFAULT_EMOJI = "💡"
|
|
16
18
|
|
|
17
|
-
def __init__(
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
22
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
23
|
+
) -> None:
|
|
18
24
|
super().__init__(syntax_registry)
|
|
19
25
|
self._syntax = syntax_registry.get_callout_syntax()
|
|
20
26
|
self._pattern = self._syntax.regex_pattern
|
|
@@ -37,7 +43,9 @@ class CalloutParser(LineParser):
|
|
|
37
43
|
else:
|
|
38
44
|
context.result_blocks.append(block)
|
|
39
45
|
|
|
40
|
-
async def _process_nested_children(
|
|
46
|
+
async def _process_nested_children(
|
|
47
|
+
self, block: CreateCalloutBlock, context: BlockParsingContext
|
|
48
|
+
) -> None:
|
|
41
49
|
child_lines = self._collect_child_lines(context)
|
|
42
50
|
if not child_lines:
|
|
43
51
|
return
|
|
@@ -59,7 +67,9 @@ class CalloutParser(LineParser):
|
|
|
59
67
|
children_text = self._convert_lines_to_text(stripped_lines)
|
|
60
68
|
return await context.parse_nested_markdown(children_text)
|
|
61
69
|
|
|
62
|
-
def _remove_parent_indentation(
|
|
70
|
+
def _remove_parent_indentation(
|
|
71
|
+
self, lines: list[str], context: BlockParsingContext
|
|
72
|
+
) -> list[str]:
|
|
63
73
|
return context.strip_indentation_level(lines, levels=1)
|
|
64
74
|
|
|
65
75
|
def _convert_lines_to_text(self, lines: list[str]) -> str:
|
|
@@ -8,11 +8,15 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
8
8
|
BlockParsingContext,
|
|
9
9
|
LineParser,
|
|
10
10
|
)
|
|
11
|
-
from notionary.page.content.syntax import
|
|
11
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class CaptionParser(LineParser):
|
|
15
|
-
def __init__(
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
18
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
19
|
+
) -> None:
|
|
16
20
|
super().__init__(syntax_registry)
|
|
17
21
|
self._syntax = syntax_registry.get_caption_syntax()
|
|
18
22
|
self._rich_text_converter = rich_text_converter
|
|
@@ -49,7 +53,9 @@ class CaptionParser(LineParser):
|
|
|
49
53
|
return False
|
|
50
54
|
return hasattr(block_data, "caption")
|
|
51
55
|
|
|
52
|
-
def _attach_caption_to_block(
|
|
56
|
+
def _attach_caption_to_block(
|
|
57
|
+
self, block: BlockCreatePayload, caption_rich_text: list
|
|
58
|
+
) -> None:
|
|
53
59
|
block_data = getattr(block, block.type.value)
|
|
54
60
|
if hasattr(block_data, "caption"):
|
|
55
61
|
block_data.caption = caption_rich_text
|
|
@@ -1,22 +1,30 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from typing import override
|
|
3
3
|
|
|
4
|
-
from notionary.blocks.rich_text.markdown_rich_text_converter import
|
|
4
|
+
from notionary.blocks.rich_text.markdown_rich_text_converter import (
|
|
5
|
+
MarkdownRichTextConverter,
|
|
6
|
+
)
|
|
5
7
|
from notionary.blocks.rich_text.models import RichText
|
|
6
8
|
from notionary.blocks.schemas import CodeData, CodingLanguage, CreateCodeBlock
|
|
7
9
|
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
8
|
-
from notionary.page.content.syntax import
|
|
10
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class CodeParser(LineParser):
|
|
12
14
|
DEFAULT_LANGUAGE = CodingLanguage.PLAIN_TEXT
|
|
13
15
|
|
|
14
|
-
def __init__(
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
syntax_registry: SyntaxDefinitionRegistry,
|
|
19
|
+
rich_text_converter: MarkdownRichTextConverter,
|
|
20
|
+
) -> None:
|
|
15
21
|
super().__init__(syntax_registry)
|
|
16
22
|
self._syntax = syntax_registry.get_code_syntax()
|
|
17
23
|
self._rich_text_converter = rich_text_converter
|
|
18
24
|
self._code_start_pattern = self._syntax.regex_pattern
|
|
19
|
-
self._code_end_pattern = self._syntax.end_regex_pattern or re.compile(
|
|
25
|
+
self._code_end_pattern = self._syntax.end_regex_pattern or re.compile(
|
|
26
|
+
r"^```\s*$"
|
|
27
|
+
)
|
|
20
28
|
|
|
21
29
|
@override
|
|
22
30
|
def _can_handle(self, context: BlockParsingContext) -> bool:
|
|
@@ -29,7 +37,9 @@ class CodeParser(LineParser):
|
|
|
29
37
|
code_lines = self._collect_code_lines(context)
|
|
30
38
|
lines_consumed = self._count_lines_consumed(context)
|
|
31
39
|
|
|
32
|
-
block = await self._create_code_block(
|
|
40
|
+
block = await self._create_code_block(
|
|
41
|
+
opening_line=context.line, code_lines=code_lines
|
|
42
|
+
)
|
|
33
43
|
if not block:
|
|
34
44
|
return
|
|
35
45
|
|
|
@@ -56,7 +66,9 @@ class CodeParser(LineParser):
|
|
|
56
66
|
return line_index + 1
|
|
57
67
|
return len(context.get_remaining_lines())
|
|
58
68
|
|
|
59
|
-
async def _create_code_block(
|
|
69
|
+
async def _create_code_block(
|
|
70
|
+
self, opening_line: str, code_lines: list[str]
|
|
71
|
+
) -> CreateCodeBlock | None:
|
|
60
72
|
match = self._code_start_pattern.match(opening_line)
|
|
61
73
|
if not match:
|
|
62
74
|
return None
|
|
@@ -70,7 +82,9 @@ class CodeParser(LineParser):
|
|
|
70
82
|
def _parse_language(self, language_str: str | None) -> CodingLanguage:
|
|
71
83
|
return CodingLanguage.from_string(language_str, default=self.DEFAULT_LANGUAGE)
|
|
72
84
|
|
|
73
|
-
async def _create_rich_text_from_code(
|
|
85
|
+
async def _create_rich_text_from_code(
|
|
86
|
+
self, code_lines: list[str]
|
|
87
|
+
) -> list[RichText]:
|
|
74
88
|
content = "\n".join(code_lines) if code_lines else ""
|
|
75
89
|
return await self._rich_text_converter.to_rich_text(content)
|
|
76
90
|
|
|
@@ -5,14 +5,14 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
5
5
|
BlockParsingContext,
|
|
6
6
|
LineParser,
|
|
7
7
|
)
|
|
8
|
-
from notionary.page.content.syntax import
|
|
8
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ColumnParser(LineParser):
|
|
12
12
|
MIN_WIDTH_RATIO = 0
|
|
13
13
|
MAX_WIDTH_RATIO = 1.0
|
|
14
14
|
|
|
15
|
-
def __init__(self, syntax_registry:
|
|
15
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry) -> None:
|
|
16
16
|
super().__init__(syntax_registry)
|
|
17
17
|
self._syntax = syntax_registry.get_column_syntax()
|
|
18
18
|
|
|
@@ -59,7 +59,9 @@ class ColumnParser(LineParser):
|
|
|
59
59
|
def _is_valid_width_ratio(self, width_ratio: float) -> bool:
|
|
60
60
|
return self.MIN_WIDTH_RATIO < width_ratio <= self.MAX_WIDTH_RATIO
|
|
61
61
|
|
|
62
|
-
async def _populate_children(
|
|
62
|
+
async def _populate_children(
|
|
63
|
+
self, block: CreateColumnBlock, context: BlockParsingContext
|
|
64
|
+
) -> None:
|
|
63
65
|
parent_indent_level = context.get_line_indentation_level()
|
|
64
66
|
child_lines = context.collect_indented_child_lines(parent_indent_level)
|
|
65
67
|
|
|
@@ -70,7 +72,9 @@ class ColumnParser(LineParser):
|
|
|
70
72
|
block.column.children = child_blocks
|
|
71
73
|
context.lines_consumed = len(child_lines)
|
|
72
74
|
|
|
73
|
-
async def _parse_indented_children(
|
|
75
|
+
async def _parse_indented_children(
|
|
76
|
+
self, child_lines: list[str], context: BlockParsingContext
|
|
77
|
+
) -> list:
|
|
74
78
|
stripped_lines = context.strip_indentation_level(child_lines, levels=1)
|
|
75
79
|
child_markdown = "\n".join(stripped_lines)
|
|
76
80
|
return await context.parse_nested_markdown(child_markdown)
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.enums import BlockType
|
|
4
|
-
from notionary.blocks.schemas import
|
|
4
|
+
from notionary.blocks.schemas import (
|
|
5
|
+
BlockCreatePayload,
|
|
6
|
+
CreateColumnListBlock,
|
|
7
|
+
CreateColumnListData,
|
|
8
|
+
)
|
|
5
9
|
from notionary.page.content.parser.parsers.base import (
|
|
6
10
|
BlockParsingContext,
|
|
7
11
|
LineParser,
|
|
8
12
|
)
|
|
9
|
-
from notionary.page.content.syntax import
|
|
13
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
class ColumnListParser(LineParser):
|
|
13
|
-
def __init__(self, syntax_registry:
|
|
17
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry) -> None:
|
|
14
18
|
super().__init__(syntax_registry)
|
|
15
19
|
self._syntax = syntax_registry.get_column_list_syntax()
|
|
16
20
|
|
|
@@ -35,9 +39,13 @@ class ColumnListParser(LineParser):
|
|
|
35
39
|
column_list_data = CreateColumnListData(children=[])
|
|
36
40
|
return CreateColumnListBlock(column_list=column_list_data)
|
|
37
41
|
|
|
38
|
-
async def _populate_columns(
|
|
42
|
+
async def _populate_columns(
|
|
43
|
+
self, block: CreateColumnListBlock, context: BlockParsingContext
|
|
44
|
+
) -> None:
|
|
39
45
|
parent_indent_level = context.get_line_indentation_level()
|
|
40
|
-
child_lines = self._collect_children_allowing_empty_lines(
|
|
46
|
+
child_lines = self._collect_children_allowing_empty_lines(
|
|
47
|
+
context, parent_indent_level
|
|
48
|
+
)
|
|
41
49
|
|
|
42
50
|
if not child_lines:
|
|
43
51
|
return
|
|
@@ -46,7 +54,9 @@ class ColumnListParser(LineParser):
|
|
|
46
54
|
block.column_list.children = column_blocks
|
|
47
55
|
context.lines_consumed = len(child_lines)
|
|
48
56
|
|
|
49
|
-
async def _parse_column_children(
|
|
57
|
+
async def _parse_column_children(
|
|
58
|
+
self, child_lines: list[str], context: BlockParsingContext
|
|
59
|
+
) -> list:
|
|
50
60
|
stripped_lines = context.strip_indentation_level(child_lines, levels=1)
|
|
51
61
|
child_markdown = "\n".join(stripped_lines)
|
|
52
62
|
parsed_blocks = await context.parse_nested_markdown(child_markdown)
|
|
@@ -67,7 +77,9 @@ class ColumnListParser(LineParser):
|
|
|
67
77
|
|
|
68
78
|
return child_lines
|
|
69
79
|
|
|
70
|
-
def _should_include_as_child(
|
|
80
|
+
def _should_include_as_child(
|
|
81
|
+
self, line: str, expected_indent: int, context: BlockParsingContext
|
|
82
|
+
) -> bool:
|
|
71
83
|
if not line.strip():
|
|
72
84
|
return True
|
|
73
85
|
|
|
@@ -5,11 +5,11 @@ from notionary.page.content.parser.parsers.base import (
|
|
|
5
5
|
BlockParsingContext,
|
|
6
6
|
LineParser,
|
|
7
7
|
)
|
|
8
|
-
from notionary.page.content.syntax import
|
|
8
|
+
from notionary.page.content.syntax.definition import SyntaxDefinitionRegistry
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class DividerParser(LineParser):
|
|
12
|
-
def __init__(self, syntax_registry:
|
|
12
|
+
def __init__(self, syntax_registry: SyntaxDefinitionRegistry) -> None:
|
|
13
13
|
super().__init__(syntax_registry)
|
|
14
14
|
self._syntax = syntax_registry.get_divider_syntax()
|
|
15
15
|
|