notionary 0.2.27__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +5 -20
- notionary/blocks/__init__.py +4 -4
- notionary/blocks/client.py +90 -216
- notionary/blocks/enums.py +167 -0
- notionary/blocks/rich_text/markdown_rich_text_converter.py +280 -0
- notionary/blocks/rich_text/models.py +178 -0
- notionary/blocks/rich_text/name_id_resolver/__init__.py +13 -0
- notionary/blocks/rich_text/name_id_resolver/data_source.py +32 -0
- notionary/blocks/rich_text/name_id_resolver/database.py +31 -0
- notionary/blocks/rich_text/name_id_resolver/page.py +34 -0
- notionary/blocks/rich_text/name_id_resolver/person.py +37 -0
- notionary/blocks/rich_text/name_id_resolver/port.py +11 -0
- notionary/blocks/rich_text/rich_text_markdown_converter.py +144 -0
- notionary/blocks/rich_text/rich_text_patterns.py +42 -0
- notionary/blocks/schemas.py +778 -0
- notionary/comments/__init__.py +1 -22
- notionary/comments/client.py +52 -187
- notionary/comments/factory.py +38 -0
- notionary/comments/models.py +5 -127
- notionary/comments/schemas.py +240 -0
- notionary/comments/service.py +34 -0
- notionary/data_source/http/client.py +11 -0
- notionary/data_source/http/data_source_instance_client.py +104 -0
- notionary/data_source/properties/schemas.py +402 -0
- notionary/data_source/query/builder.py +448 -0
- notionary/data_source/query/resolver.py +114 -0
- notionary/data_source/query/schema.py +302 -0
- notionary/data_source/query/validator.py +73 -0
- notionary/data_source/schema/registry.py +104 -0
- notionary/data_source/schema/service.py +136 -0
- notionary/data_source/schemas.py +27 -0
- notionary/data_source/service.py +377 -0
- notionary/database/client.py +30 -135
- notionary/database/database_metadata_update_client.py +19 -0
- notionary/database/schemas.py +29 -0
- notionary/database/service.py +168 -0
- notionary/exceptions/__init__.py +33 -0
- notionary/exceptions/api.py +41 -0
- notionary/exceptions/base.py +2 -0
- notionary/exceptions/block_parsing.py +16 -0
- notionary/exceptions/data_source/__init__.py +6 -0
- notionary/exceptions/data_source/builder.py +182 -0
- notionary/exceptions/data_source/properties.py +34 -0
- notionary/exceptions/properties.py +58 -0
- notionary/exceptions/search.py +57 -0
- notionary/file_upload/client.py +18 -30
- notionary/file_upload/models.py +7 -8
- notionary/file_upload/{notion_file_upload.py → service.py} +29 -64
- notionary/http/client.py +204 -0
- notionary/http/models.py +50 -0
- notionary/page/blocks/client.py +1 -0
- notionary/page/content/factory.py +73 -0
- notionary/page/content/markdown/__init__.py +5 -0
- notionary/page/content/markdown/builder.py +226 -0
- notionary/page/content/markdown/nodes/__init__.py +52 -0
- notionary/page/content/markdown/nodes/audio.py +23 -0
- notionary/page/content/markdown/nodes/base.py +12 -0
- notionary/page/content/markdown/nodes/bookmark.py +25 -0
- notionary/page/content/markdown/nodes/breadcrumb.py +14 -0
- notionary/page/content/markdown/nodes/bulleted_list.py +41 -0
- notionary/page/content/markdown/nodes/callout.py +34 -0
- notionary/page/content/markdown/nodes/code.py +28 -0
- notionary/page/content/markdown/nodes/columns.py +69 -0
- notionary/page/content/markdown/nodes/container.py +64 -0
- notionary/page/content/markdown/nodes/divider.py +14 -0
- notionary/page/content/markdown/nodes/embed.py +23 -0
- notionary/page/content/markdown/nodes/equation.py +19 -0
- notionary/page/content/markdown/nodes/file.py +23 -0
- notionary/page/content/markdown/nodes/heading.py +36 -0
- notionary/page/content/markdown/nodes/image.py +23 -0
- notionary/page/content/markdown/nodes/mixins/__init__.py +5 -0
- notionary/page/content/markdown/nodes/mixins/caption.py +12 -0
- notionary/page/content/markdown/nodes/numbered_list.py +38 -0
- notionary/page/content/markdown/nodes/paragraph.py +14 -0
- notionary/page/content/markdown/nodes/pdf.py +23 -0
- notionary/page/content/markdown/nodes/quote.py +27 -0
- notionary/page/content/markdown/nodes/space.py +14 -0
- notionary/page/content/markdown/nodes/table.py +45 -0
- notionary/page/content/markdown/nodes/table_of_contents.py +14 -0
- notionary/page/content/markdown/nodes/todo.py +38 -0
- notionary/page/content/markdown/nodes/toggle.py +27 -0
- notionary/page/content/markdown/nodes/video.py +23 -0
- notionary/page/content/parser/context.py +126 -0
- notionary/page/content/parser/factory.py +210 -0
- notionary/page/content/parser/parsers/__init__.py +58 -0
- notionary/page/content/parser/parsers/audio.py +40 -0
- notionary/page/content/parser/parsers/base.py +30 -0
- notionary/page/content/parser/parsers/bookmark.py +33 -0
- notionary/page/content/parser/parsers/breadcrumb.py +33 -0
- notionary/page/content/parser/parsers/bulleted_list.py +85 -0
- notionary/page/content/parser/parsers/callout.py +100 -0
- notionary/page/content/parser/parsers/caption.py +55 -0
- notionary/page/content/parser/parsers/code.py +81 -0
- notionary/page/content/parser/parsers/column.py +76 -0
- notionary/page/content/parser/parsers/column_list.py +81 -0
- notionary/page/content/parser/parsers/divider.py +33 -0
- notionary/page/content/parser/parsers/embed.py +33 -0
- notionary/page/content/parser/parsers/equation.py +65 -0
- notionary/page/content/parser/parsers/file.py +42 -0
- notionary/page/content/parser/parsers/heading.py +115 -0
- notionary/page/content/parser/parsers/image.py +42 -0
- notionary/page/content/parser/parsers/numbered_list.py +89 -0
- notionary/page/content/parser/parsers/paragraph.py +37 -0
- notionary/page/content/parser/parsers/pdf.py +42 -0
- notionary/page/content/parser/parsers/quote.py +125 -0
- notionary/page/content/parser/parsers/space.py +41 -0
- notionary/page/content/parser/parsers/table.py +144 -0
- notionary/page/content/parser/parsers/table_of_contents.py +32 -0
- notionary/page/content/parser/parsers/todo.py +96 -0
- notionary/page/content/parser/parsers/toggle.py +70 -0
- notionary/page/content/parser/parsers/video.py +42 -0
- notionary/page/content/parser/post_processing/handlers/__init__.py +5 -0
- notionary/page/content/parser/post_processing/handlers/rich_text_length.py +95 -0
- notionary/page/content/parser/post_processing/handlers/rich_text_length_truncation.py +114 -0
- notionary/page/content/parser/post_processing/port.py +9 -0
- notionary/page/content/parser/post_processing/service.py +16 -0
- notionary/page/content/parser/pre_processsing/handlers/__init__.py +11 -0
- notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +130 -0
- notionary/page/content/parser/pre_processsing/handlers/indentation.py +84 -0
- notionary/page/content/parser/pre_processsing/handlers/port.py +7 -0
- notionary/page/content/parser/pre_processsing/handlers/whitespace.py +73 -0
- notionary/page/content/parser/pre_processsing/service.py +15 -0
- notionary/page/content/parser/service.py +78 -0
- notionary/page/content/renderer/context.py +51 -0
- notionary/page/content/renderer/factory.py +231 -0
- notionary/page/content/renderer/post_processing/handlers/__init__.py +5 -0
- notionary/page/content/renderer/post_processing/handlers/numbered_list.py +156 -0
- notionary/page/content/renderer/post_processing/port.py +7 -0
- notionary/page/content/renderer/post_processing/service.py +15 -0
- notionary/page/content/renderer/renderers/__init__.py +55 -0
- notionary/page/content/renderer/renderers/audio.py +31 -0
- notionary/page/content/renderer/renderers/base.py +31 -0
- notionary/page/content/renderer/renderers/bookmark.py +25 -0
- notionary/page/content/renderer/renderers/breadcrumb.py +21 -0
- notionary/page/content/renderer/renderers/bulleted_list.py +48 -0
- notionary/page/content/renderer/renderers/callout.py +50 -0
- notionary/page/content/renderer/renderers/captioned_block.py +58 -0
- notionary/page/content/renderer/renderers/code.py +34 -0
- notionary/page/content/renderer/renderers/column.py +53 -0
- notionary/page/content/renderer/renderers/column_list.py +44 -0
- notionary/page/content/renderer/renderers/divider.py +22 -0
- notionary/page/content/renderer/renderers/embed.py +25 -0
- notionary/page/content/renderer/renderers/equation.py +37 -0
- notionary/page/content/renderer/renderers/fallback.py +24 -0
- notionary/page/content/renderer/renderers/file.py +40 -0
- notionary/page/content/renderer/renderers/heading.py +95 -0
- notionary/page/content/renderer/renderers/image.py +31 -0
- notionary/page/content/renderer/renderers/numbered_list.py +42 -0
- notionary/page/content/renderer/renderers/paragraph.py +40 -0
- notionary/page/content/renderer/renderers/pdf.py +31 -0
- notionary/page/content/renderer/renderers/quote.py +49 -0
- notionary/page/content/renderer/renderers/table.py +115 -0
- notionary/page/content/renderer/renderers/table_of_contents.py +26 -0
- notionary/page/content/renderer/renderers/table_row.py +17 -0
- notionary/page/content/renderer/renderers/todo.py +56 -0
- notionary/page/content/renderer/renderers/toggle.py +52 -0
- notionary/page/content/renderer/renderers/video.py +31 -0
- notionary/page/content/renderer/service.py +50 -0
- notionary/page/content/service.py +68 -0
- notionary/page/content/syntax/__init__.py +4 -0
- notionary/page/content/syntax/grammar.py +10 -0
- notionary/page/content/syntax/models.py +66 -0
- notionary/page/content/syntax/registry.py +393 -0
- notionary/page/page_context.py +7 -16
- notionary/page/page_http_client.py +15 -0
- notionary/page/page_metadata_update_client.py +19 -0
- notionary/page/properties/client.py +144 -0
- notionary/page/properties/factory.py +26 -0
- notionary/page/properties/models.py +308 -0
- notionary/page/properties/service.py +261 -0
- notionary/page/schemas.py +13 -0
- notionary/page/service.py +225 -0
- notionary/shared/entity/client.py +29 -0
- notionary/shared/entity/dto_parsers.py +53 -0
- notionary/shared/entity/entity_metadata_update_client.py +41 -0
- notionary/shared/entity/schemas.py +45 -0
- notionary/shared/entity/service.py +171 -0
- notionary/shared/models/cover.py +20 -0
- notionary/shared/models/file.py +21 -0
- notionary/shared/models/icon.py +28 -0
- notionary/shared/models/parent.py +41 -0
- notionary/shared/properties/type.py +30 -0
- notionary/shared/typings.py +3 -0
- notionary/user/__init__.py +4 -8
- notionary/user/base.py +138 -0
- notionary/user/bot.py +70 -0
- notionary/user/client.py +22 -111
- notionary/user/person.py +41 -0
- notionary/user/schemas.py +67 -0
- notionary/user/service.py +65 -0
- notionary/utils/date.py +51 -0
- notionary/utils/decorators.py +122 -0
- notionary/utils/fuzzy.py +68 -0
- notionary/utils/mixins/logging.py +58 -0
- notionary/utils/pagination.py +100 -0
- notionary/utils/uuid_utils.py +20 -0
- notionary/workspace/__init__.py +4 -0
- notionary/workspace/client.py +62 -0
- notionary/workspace/query/__init__.py +3 -0
- notionary/workspace/query/builder.py +60 -0
- notionary/workspace/query/models.py +61 -0
- notionary/workspace/query/service.py +100 -0
- notionary/workspace/schemas.py +21 -0
- notionary/workspace/service.py +116 -0
- notionary-0.3.0.dist-info/METADATA +201 -0
- notionary-0.3.0.dist-info/RECORD +209 -0
- {notionary-0.2.27.dist-info → notionary-0.3.0.dist-info}/WHEEL +1 -1
- {notionary-0.2.27.dist-info → notionary-0.3.0.dist-info/licenses}/LICENSE +9 -9
- notionary/base_notion_client.py +0 -219
- notionary/blocks/_bootstrap.py +0 -271
- notionary/blocks/audio/__init__.py +0 -11
- notionary/blocks/audio/audio_element.py +0 -158
- notionary/blocks/audio/audio_markdown_node.py +0 -24
- notionary/blocks/audio/audio_models.py +0 -10
- notionary/blocks/base_block_element.py +0 -42
- notionary/blocks/bookmark/__init__.py +0 -12
- notionary/blocks/bookmark/bookmark_element.py +0 -83
- notionary/blocks/bookmark/bookmark_markdown_node.py +0 -28
- notionary/blocks/bookmark/bookmark_models.py +0 -15
- notionary/blocks/breadcrumbs/__init__.py +0 -15
- notionary/blocks/breadcrumbs/breadcrumb_element.py +0 -39
- notionary/blocks/breadcrumbs/breadcrumb_markdown_node.py +0 -13
- notionary/blocks/breadcrumbs/breadcrumb_models.py +0 -12
- notionary/blocks/bulleted_list/__init__.py +0 -15
- notionary/blocks/bulleted_list/bulleted_list_element.py +0 -74
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +0 -20
- notionary/blocks/bulleted_list/bulleted_list_models.py +0 -17
- notionary/blocks/callout/__init__.py +0 -12
- notionary/blocks/callout/callout_element.py +0 -99
- notionary/blocks/callout/callout_markdown_node.py +0 -19
- notionary/blocks/callout/callout_models.py +0 -33
- notionary/blocks/child_database/__init__.py +0 -14
- notionary/blocks/child_database/child_database_element.py +0 -59
- notionary/blocks/child_database/child_database_models.py +0 -12
- notionary/blocks/child_page/__init__.py +0 -9
- notionary/blocks/child_page/child_page_element.py +0 -94
- notionary/blocks/child_page/child_page_models.py +0 -12
- notionary/blocks/code/__init__.py +0 -11
- notionary/blocks/code/code_element.py +0 -149
- notionary/blocks/code/code_markdown_node.py +0 -80
- notionary/blocks/code/code_models.py +0 -94
- notionary/blocks/column/__init__.py +0 -25
- notionary/blocks/column/column_element.py +0 -65
- notionary/blocks/column/column_list_element.py +0 -52
- notionary/blocks/column/column_list_markdown_node.py +0 -34
- notionary/blocks/column/column_markdown_node.py +0 -42
- notionary/blocks/column/column_models.py +0 -26
- notionary/blocks/divider/__init__.py +0 -12
- notionary/blocks/divider/divider_element.py +0 -41
- notionary/blocks/divider/divider_markdown_node.py +0 -11
- notionary/blocks/divider/divider_models.py +0 -12
- notionary/blocks/embed/__init__.py +0 -12
- notionary/blocks/embed/embed_element.py +0 -98
- notionary/blocks/embed/embed_markdown_node.py +0 -19
- notionary/blocks/embed/embed_models.py +0 -14
- notionary/blocks/equation/__init__.py +0 -13
- notionary/blocks/equation/equation_element.py +0 -133
- notionary/blocks/equation/equation_element_markdown_node.py +0 -23
- notionary/blocks/equation/equation_models.py +0 -11
- notionary/blocks/file/__init__.py +0 -23
- notionary/blocks/file/file_element.py +0 -133
- notionary/blocks/file/file_element_markdown_node.py +0 -24
- notionary/blocks/file/file_element_models.py +0 -39
- notionary/blocks/heading/__init__.py +0 -19
- notionary/blocks/heading/heading_element.py +0 -112
- notionary/blocks/heading/heading_markdown_node.py +0 -16
- notionary/blocks/heading/heading_models.py +0 -29
- notionary/blocks/image_block/__init__.py +0 -11
- notionary/blocks/image_block/image_element.py +0 -130
- notionary/blocks/image_block/image_markdown_node.py +0 -25
- notionary/blocks/image_block/image_models.py +0 -10
- notionary/blocks/markdown/markdown_builder.py +0 -525
- notionary/blocks/markdown/markdown_document_model.py +0 -0
- notionary/blocks/markdown/markdown_node.py +0 -25
- notionary/blocks/mixins/captions/__init__.py +0 -4
- notionary/blocks/mixins/captions/caption_markdown_node_mixin.py +0 -31
- notionary/blocks/mixins/captions/caption_mixin.py +0 -92
- notionary/blocks/mixins/file_upload/__init__.py +0 -3
- notionary/blocks/mixins/file_upload/file_upload_mixin.py +0 -320
- notionary/blocks/models.py +0 -174
- notionary/blocks/numbered_list/__init__.py +0 -16
- notionary/blocks/numbered_list/numbered_list_element.py +0 -65
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +0 -17
- notionary/blocks/numbered_list/numbered_list_models.py +0 -17
- notionary/blocks/paragraph/__init__.py +0 -15
- notionary/blocks/paragraph/paragraph_element.py +0 -58
- notionary/blocks/paragraph/paragraph_markdown_node.py +0 -16
- notionary/blocks/paragraph/paragraph_models.py +0 -16
- notionary/blocks/pdf/__init__.py +0 -11
- notionary/blocks/pdf/pdf_element.py +0 -146
- notionary/blocks/pdf/pdf_markdown_node.py +0 -24
- notionary/blocks/pdf/pdf_models.py +0 -11
- notionary/blocks/quote/__init__.py +0 -14
- notionary/blocks/quote/quote_element.py +0 -75
- notionary/blocks/quote/quote_markdown_node.py +0 -16
- notionary/blocks/quote/quote_models.py +0 -18
- notionary/blocks/registry/__init__.py +0 -3
- notionary/blocks/registry/block_registry.py +0 -150
- notionary/blocks/rich_text/__init__.py +0 -33
- notionary/blocks/rich_text/rich_text_models.py +0 -221
- notionary/blocks/rich_text/text_inline_formatter.py +0 -456
- notionary/blocks/syntax_prompt_builder.py +0 -137
- notionary/blocks/table/__init__.py +0 -19
- notionary/blocks/table/table_element.py +0 -225
- notionary/blocks/table/table_markdown_node.py +0 -42
- notionary/blocks/table/table_models.py +0 -28
- notionary/blocks/table_of_contents/__init__.py +0 -17
- notionary/blocks/table_of_contents/table_of_contents_element.py +0 -80
- notionary/blocks/table_of_contents/table_of_contents_markdown_node.py +0 -21
- notionary/blocks/table_of_contents/table_of_contents_models.py +0 -18
- notionary/blocks/todo/__init__.py +0 -12
- notionary/blocks/todo/todo_element.py +0 -81
- notionary/blocks/todo/todo_markdown_node.py +0 -21
- notionary/blocks/todo/todo_models.py +0 -18
- notionary/blocks/toggle/__init__.py +0 -12
- notionary/blocks/toggle/toggle_element.py +0 -112
- notionary/blocks/toggle/toggle_markdown_node.py +0 -31
- notionary/blocks/toggle/toggle_models.py +0 -17
- notionary/blocks/toggleable_heading/__init__.py +0 -11
- notionary/blocks/toggleable_heading/toggleable_heading_element.py +0 -115
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +0 -34
- notionary/blocks/types.py +0 -130
- notionary/blocks/video/__init__.py +0 -11
- notionary/blocks/video/video_element.py +0 -187
- notionary/blocks/video/video_element_models.py +0 -10
- notionary/blocks/video/video_markdown_node.py +0 -26
- notionary/database/__init__.py +0 -4
- notionary/database/database.py +0 -480
- notionary/database/database_filter_builder.py +0 -173
- notionary/database/database_provider.py +0 -227
- notionary/database/exceptions.py +0 -13
- notionary/database/models.py +0 -337
- notionary/database/notion_database.py +0 -487
- notionary/file_upload/__init__.py +0 -7
- notionary/page/client.py +0 -124
- notionary/page/markdown_whitespace_processor.py +0 -129
- notionary/page/models.py +0 -322
- notionary/page/notion_page.py +0 -712
- notionary/page/page_content_deleting_service.py +0 -117
- notionary/page/page_content_writer.py +0 -80
- notionary/page/property_formatter.py +0 -99
- notionary/page/reader/handler/__init__.py +0 -19
- notionary/page/reader/handler/base_block_renderer.py +0 -44
- notionary/page/reader/handler/block_processing_context.py +0 -35
- notionary/page/reader/handler/block_rendering_context.py +0 -48
- notionary/page/reader/handler/column_list_renderer.py +0 -51
- notionary/page/reader/handler/column_renderer.py +0 -60
- notionary/page/reader/handler/equation_renderer.py +0 -0
- notionary/page/reader/handler/line_renderer.py +0 -73
- notionary/page/reader/handler/numbered_list_renderer.py +0 -85
- notionary/page/reader/handler/toggle_renderer.py +0 -69
- notionary/page/reader/handler/toggleable_heading_renderer.py +0 -89
- notionary/page/reader/page_content_retriever.py +0 -81
- notionary/page/search_filter_builder.py +0 -132
- notionary/page/utils.py +0 -60
- notionary/page/writer/handler/__init__.py +0 -24
- notionary/page/writer/handler/code_handler.py +0 -72
- notionary/page/writer/handler/column_handler.py +0 -141
- notionary/page/writer/handler/column_list_handler.py +0 -139
- notionary/page/writer/handler/equation_handler.py +0 -74
- notionary/page/writer/handler/line_handler.py +0 -35
- notionary/page/writer/handler/line_processing_context.py +0 -54
- notionary/page/writer/handler/regular_line_handler.py +0 -86
- notionary/page/writer/handler/table_handler.py +0 -66
- notionary/page/writer/handler/toggle_handler.py +0 -159
- notionary/page/writer/handler/toggleable_heading_handler.py +0 -174
- notionary/page/writer/markdown_to_notion_converter.py +0 -139
- notionary/page/writer/markdown_to_notion_converter_context.py +0 -30
- notionary/page/writer/markdown_to_notion_text_length_post_processor.py +0 -0
- notionary/page/writer/notion_text_length_processor.py +0 -150
- notionary/schemas/__init__.py +0 -3
- notionary/schemas/base.py +0 -73
- notionary/shared/__init__.py +0 -3
- notionary/shared/name_to_id_resolver.py +0 -203
- notionary/telemetry/__init__.py +0 -19
- notionary/telemetry/service.py +0 -136
- notionary/telemetry/views.py +0 -73
- notionary/user/base_notion_user.py +0 -53
- notionary/user/models.py +0 -84
- notionary/user/notion_bot_user.py +0 -226
- notionary/user/notion_user.py +0 -255
- notionary/user/notion_user_manager.py +0 -101
- notionary/util/__init__.py +0 -15
- notionary/util/concurrency_limiter.py +0 -0
- notionary/util/factory_decorator.py +0 -0
- notionary/util/factory_only.py +0 -37
- notionary/util/fuzzy.py +0 -75
- notionary/util/logging_mixin.py +0 -59
- notionary/util/page_id_utils.py +0 -27
- notionary/util/singleton.py +0 -18
- notionary/util/singleton_metaclass.py +0 -22
- notionary/workspace.py +0 -105
- notionary-0.2.27.dist-info/METADATA +0 -270
- notionary-0.2.27.dist-info/RECORD +0 -202
- /notionary/{database → user}/factory.py +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Handles request limits for rich texts (see https://developers.notion.com/reference/request-limits)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, override
|
|
6
|
+
|
|
7
|
+
from notionary.blocks.rich_text.models import RichText, RichTextType
|
|
8
|
+
from notionary.blocks.schemas import BlockCreatePayload
|
|
9
|
+
from notionary.page.content.parser.post_processing.port import PostProcessor
|
|
10
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
11
|
+
|
|
12
|
+
type _NestedBlockList = BlockCreatePayload | list["_NestedBlockList"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
16
|
+
NOTION_MAX_LENGTH = 2000
|
|
17
|
+
|
|
18
|
+
def __init__(self, max_text_length: int = NOTION_MAX_LENGTH) -> None:
|
|
19
|
+
self.max_text_length = max_text_length
|
|
20
|
+
|
|
21
|
+
@override
|
|
22
|
+
def process(self, blocks: list[BlockCreatePayload]) -> list[BlockCreatePayload]:
|
|
23
|
+
if not blocks:
|
|
24
|
+
return blocks
|
|
25
|
+
|
|
26
|
+
flattened_blocks = self._flatten_blocks(blocks)
|
|
27
|
+
return [self._process_block(block) for block in flattened_blocks]
|
|
28
|
+
|
|
29
|
+
def _flatten_blocks(self, blocks: list[_NestedBlockList]) -> list[BlockCreatePayload]:
|
|
30
|
+
flattened: list[BlockCreatePayload] = []
|
|
31
|
+
|
|
32
|
+
for item in blocks:
|
|
33
|
+
if isinstance(item, list):
|
|
34
|
+
flattened.extend(self._flatten_blocks(item))
|
|
35
|
+
else:
|
|
36
|
+
flattened.append(item)
|
|
37
|
+
|
|
38
|
+
return flattened
|
|
39
|
+
|
|
40
|
+
def _process_block(self, block: BlockCreatePayload) -> BlockCreatePayload:
|
|
41
|
+
block_copy = block.model_copy(deep=True)
|
|
42
|
+
content = self._get_block_content(block_copy)
|
|
43
|
+
|
|
44
|
+
if content is not None:
|
|
45
|
+
self._truncate_content(content)
|
|
46
|
+
|
|
47
|
+
return block_copy
|
|
48
|
+
|
|
49
|
+
def _get_block_content(self, block: BlockCreatePayload) -> Any | None:
|
|
50
|
+
content = getattr(block, block.type.value, None)
|
|
51
|
+
|
|
52
|
+
if content is None:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
if hasattr(content, "rich_text") or hasattr(content, "children"):
|
|
56
|
+
return content
|
|
57
|
+
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def _truncate_content(self, content: object) -> None:
|
|
61
|
+
if hasattr(content, "rich_text"):
|
|
62
|
+
self._truncate_rich_text_list(content.rich_text)
|
|
63
|
+
|
|
64
|
+
if hasattr(content, "caption"):
|
|
65
|
+
self._truncate_rich_text_list(content.caption)
|
|
66
|
+
|
|
67
|
+
if hasattr(content, "children"):
|
|
68
|
+
children = getattr(content, "children", None)
|
|
69
|
+
if children:
|
|
70
|
+
for child in children:
|
|
71
|
+
child_content = self._get_block_content(child)
|
|
72
|
+
if child_content:
|
|
73
|
+
self._truncate_content(child_content)
|
|
74
|
+
|
|
75
|
+
def _truncate_rich_text_list(self, rich_text_list: list[RichText]) -> None:
|
|
76
|
+
for rich_text in rich_text_list:
|
|
77
|
+
if not self._is_text_type(rich_text):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
content = rich_text.text.content
|
|
81
|
+
if len(content) > self.max_text_length:
|
|
82
|
+
self.logger.warning(
|
|
83
|
+
"Truncating text content from %d to %d characters",
|
|
84
|
+
len(content),
|
|
85
|
+
self.max_text_length,
|
|
86
|
+
)
|
|
87
|
+
truncated_content = self._create_truncated_text_with_ellipsis(content)
|
|
88
|
+
rich_text.text.content = truncated_content
|
|
89
|
+
|
|
90
|
+
def _create_truncated_text_with_ellipsis(self, content: str) -> str:
|
|
91
|
+
cutoff = self.max_text_length - 3
|
|
92
|
+
return content[:cutoff] + "..."
|
|
93
|
+
|
|
94
|
+
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
95
|
+
return rich_text.type == RichTextType.TEXT and rich_text.text and rich_text.text.content
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Handles request limits for rich texts (see https://developers.notion.com/reference/request-limits)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, override
|
|
6
|
+
|
|
7
|
+
from notionary.blocks.rich_text.models import RichText, RichTextType
|
|
8
|
+
from notionary.blocks.schemas import BlockCreatePayload
|
|
9
|
+
from notionary.page.content.parser.post_processing.port import PostProcessor
|
|
10
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
11
|
+
|
|
12
|
+
type _NestedBlockList = BlockCreatePayload | list["_NestedBlockList"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RichTextLengthTruncationPostProcessor(PostProcessor, LoggingMixin):
|
|
16
|
+
NOTION_MAX_LENGTH = 2000
|
|
17
|
+
ELLIPSIS = "..."
|
|
18
|
+
|
|
19
|
+
def __init__(self, max_text_length: int = NOTION_MAX_LENGTH) -> None:
|
|
20
|
+
self.max_text_length = max_text_length
|
|
21
|
+
|
|
22
|
+
@override
|
|
23
|
+
def process(self, blocks: list[BlockCreatePayload]) -> list[BlockCreatePayload]:
|
|
24
|
+
if not blocks:
|
|
25
|
+
return blocks
|
|
26
|
+
|
|
27
|
+
flattened_blocks = self._flatten_blocks(blocks)
|
|
28
|
+
return [self._process_block(block) for block in flattened_blocks]
|
|
29
|
+
|
|
30
|
+
def _flatten_blocks(self, blocks: list[_NestedBlockList]) -> list[BlockCreatePayload]:
|
|
31
|
+
flattened: list[BlockCreatePayload] = []
|
|
32
|
+
|
|
33
|
+
for item in blocks:
|
|
34
|
+
if isinstance(item, list):
|
|
35
|
+
flattened.extend(self._flatten_blocks(item))
|
|
36
|
+
else:
|
|
37
|
+
flattened.append(item)
|
|
38
|
+
|
|
39
|
+
return flattened
|
|
40
|
+
|
|
41
|
+
def _process_block(self, block: BlockCreatePayload) -> BlockCreatePayload:
|
|
42
|
+
block_copy = block.model_copy(deep=True)
|
|
43
|
+
content = self._get_block_content(block_copy)
|
|
44
|
+
|
|
45
|
+
if content is not None:
|
|
46
|
+
self._truncate_content(content)
|
|
47
|
+
|
|
48
|
+
return block_copy
|
|
49
|
+
|
|
50
|
+
def _get_block_content(self, block: BlockCreatePayload) -> Any | None:
|
|
51
|
+
content = getattr(block, block.type.value, None)
|
|
52
|
+
|
|
53
|
+
if content is None:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
if hasattr(content, "rich_text") or hasattr(content, "children"):
|
|
57
|
+
return content
|
|
58
|
+
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
def _truncate_content(self, content: object) -> None:
|
|
62
|
+
self._truncate_rich_text_fields(content)
|
|
63
|
+
self._truncate_children_recursively(content)
|
|
64
|
+
|
|
65
|
+
def _truncate_rich_text_fields(self, content: object) -> None:
|
|
66
|
+
if hasattr(content, "rich_text"):
|
|
67
|
+
self._truncate_rich_text_list(content.rich_text)
|
|
68
|
+
|
|
69
|
+
if hasattr(content, "caption"):
|
|
70
|
+
self._truncate_rich_text_list(content.caption)
|
|
71
|
+
|
|
72
|
+
def _truncate_children_recursively(self, content: object) -> None:
|
|
73
|
+
if not hasattr(content, "children"):
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
children = getattr(content, "children", None)
|
|
77
|
+
if not children:
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
for child in children:
|
|
81
|
+
self._truncate_child_content(child)
|
|
82
|
+
|
|
83
|
+
def _truncate_child_content(self, child: Any) -> None:
|
|
84
|
+
child_content = self._get_block_content(child)
|
|
85
|
+
if child_content:
|
|
86
|
+
self._truncate_content(child_content)
|
|
87
|
+
|
|
88
|
+
def _truncate_rich_text_list(self, rich_text_list: list[RichText]) -> None:
|
|
89
|
+
for rich_text in rich_text_list:
|
|
90
|
+
if self._should_truncate(rich_text):
|
|
91
|
+
self._truncate_single_rich_text(rich_text)
|
|
92
|
+
|
|
93
|
+
def _should_truncate(self, rich_text: RichText) -> bool:
|
|
94
|
+
if not self._is_text_type(rich_text):
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
return len(rich_text.text.content) > self.max_text_length
|
|
98
|
+
|
|
99
|
+
def _truncate_single_rich_text(self, rich_text: RichText) -> None:
|
|
100
|
+
original_length = len(rich_text.text.content)
|
|
101
|
+
rich_text.text.content = self._create_truncated_text(rich_text.text.content)
|
|
102
|
+
|
|
103
|
+
self.logger.warning(
|
|
104
|
+
"Truncating text content from %d to %d characters",
|
|
105
|
+
original_length,
|
|
106
|
+
self.max_text_length,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def _create_truncated_text(self, content: str) -> str:
|
|
110
|
+
cutoff = self.max_text_length - len(self.ELLIPSIS)
|
|
111
|
+
return content[:cutoff] + self.ELLIPSIS
|
|
112
|
+
|
|
113
|
+
def _is_text_type(self, rich_text: RichText) -> bool:
|
|
114
|
+
return rich_text.type == RichTextType.TEXT and rich_text.text is not None and rich_text.text.content
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from notionary.blocks.schemas import BlockCreatePayload
|
|
2
|
+
from notionary.page.content.parser.post_processing.port import PostProcessor
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BlockPostProcessor:
|
|
6
|
+
def __init__(self) -> None:
|
|
7
|
+
self._processors: list[PostProcessor] = []
|
|
8
|
+
|
|
9
|
+
def register(self, processor: PostProcessor) -> None:
|
|
10
|
+
self._processors.append(processor)
|
|
11
|
+
|
|
12
|
+
def process(self, created_blocks: list[BlockCreatePayload]) -> list[BlockCreatePayload]:
|
|
13
|
+
result = created_blocks
|
|
14
|
+
for processor in self._processors:
|
|
15
|
+
result = processor.process(created_blocks)
|
|
16
|
+
return result
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .column_syntax import ColumnSyntaxPreProcessor
|
|
2
|
+
from .indentation import IndentationNormalizer
|
|
3
|
+
from .port import PreProcessor
|
|
4
|
+
from .whitespace import WhitespacePreProcessor
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"ColumnSyntaxPreProcessor",
|
|
8
|
+
"IndentationNormalizer",
|
|
9
|
+
"PreProcessor",
|
|
10
|
+
"WhitespacePreProcessor",
|
|
11
|
+
]
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import override
|
|
3
|
+
|
|
4
|
+
from notionary.exceptions.block_parsing import InsufficientColumnsError, InvalidColumnRatioSumError
|
|
5
|
+
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
6
|
+
from notionary.page.content.syntax import MarkdownGrammar, SyntaxRegistry
|
|
7
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
8
|
+
|
|
9
|
+
RATIO_TOLERANCE = 0.0001
|
|
10
|
+
MINIMUM_COLUMNS = 2
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
14
|
+
def __init__(
|
|
15
|
+
self, syntax_registry: SyntaxRegistry | None = None, markdown_grammar: MarkdownGrammar | None = None
|
|
16
|
+
) -> None:
|
|
17
|
+
super().__init__()
|
|
18
|
+
self._syntax_registry = syntax_registry or SyntaxRegistry()
|
|
19
|
+
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
20
|
+
|
|
21
|
+
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
22
|
+
self._column_list_delimiter = self._syntax_registry.get_column_list_syntax().start_delimiter
|
|
23
|
+
self._column_delimiter = self._syntax_registry.get_column_syntax().start_delimiter
|
|
24
|
+
self._column_pattern = self._syntax_registry.get_column_syntax().regex_pattern
|
|
25
|
+
|
|
26
|
+
@override
|
|
27
|
+
def process(self, markdown_text: str) -> str:
|
|
28
|
+
if not self._contains_column_lists(markdown_text):
|
|
29
|
+
return markdown_text
|
|
30
|
+
|
|
31
|
+
self._validate_all_column_lists(markdown_text)
|
|
32
|
+
return markdown_text
|
|
33
|
+
|
|
34
|
+
def _contains_column_lists(self, markdown_text: str) -> bool:
|
|
35
|
+
return self._column_list_delimiter in markdown_text
|
|
36
|
+
|
|
37
|
+
def _validate_all_column_lists(self, markdown_text: str) -> None:
|
|
38
|
+
column_list_blocks = self._extract_column_list_blocks(markdown_text)
|
|
39
|
+
|
|
40
|
+
for block in column_list_blocks:
|
|
41
|
+
self._validate_column_list_block(block)
|
|
42
|
+
|
|
43
|
+
def _extract_column_list_blocks(self, markdown_text: str) -> list[str]:
|
|
44
|
+
lines = markdown_text.split("\n")
|
|
45
|
+
blocks = []
|
|
46
|
+
|
|
47
|
+
for index, line in enumerate(lines):
|
|
48
|
+
if self._is_column_list_start(line):
|
|
49
|
+
block_content = self._extract_indented_block(lines, index + 1)
|
|
50
|
+
blocks.append(block_content)
|
|
51
|
+
|
|
52
|
+
return blocks
|
|
53
|
+
|
|
54
|
+
def _is_column_list_start(self, line: str) -> bool:
|
|
55
|
+
return line.strip() == self._column_list_delimiter
|
|
56
|
+
|
|
57
|
+
def _extract_indented_block(self, lines: list[str], start_index: int) -> str:
|
|
58
|
+
if start_index >= len(lines):
|
|
59
|
+
return ""
|
|
60
|
+
|
|
61
|
+
base_indentation = self._get_indentation_level(lines[start_index])
|
|
62
|
+
base_spaces = base_indentation * self._spaces_per_nesting_level
|
|
63
|
+
block_lines = []
|
|
64
|
+
|
|
65
|
+
for line in lines[start_index:]:
|
|
66
|
+
if self._is_empty_line(line):
|
|
67
|
+
block_lines.append(line)
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
current_indentation = self._get_indentation_level(line)
|
|
71
|
+
|
|
72
|
+
if current_indentation < base_indentation:
|
|
73
|
+
break
|
|
74
|
+
|
|
75
|
+
block_lines.append(line[base_spaces:] if len(line) >= base_spaces else line)
|
|
76
|
+
|
|
77
|
+
return "\n".join(block_lines)
|
|
78
|
+
|
|
79
|
+
def _is_empty_line(self, line: str) -> bool:
|
|
80
|
+
return not line.strip()
|
|
81
|
+
|
|
82
|
+
def _get_indentation_level(self, line: str) -> int:
|
|
83
|
+
leading_spaces = len(line) - len(line.lstrip())
|
|
84
|
+
return leading_spaces // self._spaces_per_nesting_level
|
|
85
|
+
|
|
86
|
+
def _validate_column_list_block(self, block_content: str) -> None:
|
|
87
|
+
column_matches = self._find_all_columns(block_content)
|
|
88
|
+
column_count = len(column_matches)
|
|
89
|
+
|
|
90
|
+
self._validate_minimum_column_count(column_count)
|
|
91
|
+
|
|
92
|
+
ratios = self._extract_column_ratios(column_matches)
|
|
93
|
+
self._validate_ratio_sum(ratios, column_count)
|
|
94
|
+
|
|
95
|
+
def _find_all_columns(self, content: str) -> list[re.Match]:
|
|
96
|
+
return list(self._column_pattern.finditer(content))
|
|
97
|
+
|
|
98
|
+
def _validate_minimum_column_count(self, column_count: int) -> None:
|
|
99
|
+
if column_count < MINIMUM_COLUMNS:
|
|
100
|
+
self.logger.error(f"Column list must contain at least {MINIMUM_COLUMNS} columns, found {column_count}")
|
|
101
|
+
raise InsufficientColumnsError(column_count)
|
|
102
|
+
|
|
103
|
+
def _extract_column_ratios(self, column_matches: list[re.Match]) -> list[float]:
|
|
104
|
+
ratios = []
|
|
105
|
+
|
|
106
|
+
for match in column_matches:
|
|
107
|
+
ratio_text = match.group(1)
|
|
108
|
+
if self._has_explicit_ratio(ratio_text):
|
|
109
|
+
ratios.append(float(ratio_text))
|
|
110
|
+
|
|
111
|
+
return ratios
|
|
112
|
+
|
|
113
|
+
def _has_explicit_ratio(self, ratio_text: str | None) -> bool:
|
|
114
|
+
return ratio_text is not None and ratio_text != "1"
|
|
115
|
+
|
|
116
|
+
def _validate_ratio_sum(self, ratios: list[float], column_count: int) -> None:
|
|
117
|
+
if not self._should_validate_ratios(ratios, column_count):
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
total_ratio = sum(ratios)
|
|
121
|
+
|
|
122
|
+
if not self._is_ratio_sum_valid(total_ratio):
|
|
123
|
+
self.logger.error(f"Column ratios must sum to 1.0 (±{RATIO_TOLERANCE}), but sum to {total_ratio:.4f}")
|
|
124
|
+
raise InvalidColumnRatioSumError(total_ratio, RATIO_TOLERANCE)
|
|
125
|
+
|
|
126
|
+
def _should_validate_ratios(self, ratios: list[float], column_count: int) -> bool:
|
|
127
|
+
return len(ratios) > 0 and len(ratios) == column_count
|
|
128
|
+
|
|
129
|
+
def _is_ratio_sum_valid(self, total: float) -> bool:
|
|
130
|
+
return abs(total - 1.0) <= RATIO_TOLERANCE
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import override
|
|
3
|
+
|
|
4
|
+
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
5
|
+
from notionary.page.content.syntax import MarkdownGrammar, SyntaxRegistry
|
|
6
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class IndentationNormalizer(PreProcessor, LoggingMixin):
|
|
10
|
+
def __init__(
|
|
11
|
+
self, syntax_registry: SyntaxRegistry | None = None, markdown_grammar: MarkdownGrammar | None = None
|
|
12
|
+
) -> None:
|
|
13
|
+
super().__init__()
|
|
14
|
+
self._syntax_registry = syntax_registry or SyntaxRegistry()
|
|
15
|
+
self._markdown_grammar = markdown_grammar or MarkdownGrammar()
|
|
16
|
+
|
|
17
|
+
self._spaces_per_nesting_level = self._markdown_grammar.spaces_per_nesting_level
|
|
18
|
+
self._code_block_start_delimiter = self._syntax_registry.get_code_syntax().start_delimiter
|
|
19
|
+
|
|
20
|
+
@override
|
|
21
|
+
def process(self, markdown_text: str) -> str:
|
|
22
|
+
if self._is_empty(markdown_text):
|
|
23
|
+
return ""
|
|
24
|
+
|
|
25
|
+
normalized = self._normalize_to_markdown_indentation(markdown_text)
|
|
26
|
+
|
|
27
|
+
if normalized != markdown_text:
|
|
28
|
+
self.logger.warning(
|
|
29
|
+
"Corrected non-standard indentation. Check the result for formatting errors and use consistent indentation in the source."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
return normalized
|
|
33
|
+
|
|
34
|
+
def _is_empty(self, text: str) -> bool:
|
|
35
|
+
return not text
|
|
36
|
+
|
|
37
|
+
def _normalize_to_markdown_indentation(self, markdown_text: str) -> str:
|
|
38
|
+
lines = markdown_text.split("\n")
|
|
39
|
+
processed_lines = []
|
|
40
|
+
inside_code_block = False
|
|
41
|
+
|
|
42
|
+
for line in lines:
|
|
43
|
+
if self._is_code_fence(line):
|
|
44
|
+
inside_code_block = not inside_code_block
|
|
45
|
+
processed_lines.append(line)
|
|
46
|
+
elif inside_code_block:
|
|
47
|
+
processed_lines.append(line)
|
|
48
|
+
else:
|
|
49
|
+
processed_lines.append(self._normalize_to_standard_indentation(line))
|
|
50
|
+
|
|
51
|
+
return "\n".join(processed_lines)
|
|
52
|
+
|
|
53
|
+
def _is_code_fence(self, line: str) -> bool:
|
|
54
|
+
return line.lstrip().startswith(self._code_block_start_delimiter)
|
|
55
|
+
|
|
56
|
+
def _normalize_to_standard_indentation(self, line: str) -> str:
|
|
57
|
+
if self._is_blank_line(line):
|
|
58
|
+
return ""
|
|
59
|
+
|
|
60
|
+
indentation_level = self._round_to_nearest_indentation_level(line)
|
|
61
|
+
content = self._extract_content(line)
|
|
62
|
+
|
|
63
|
+
return self._build_indented_line(indentation_level, content)
|
|
64
|
+
|
|
65
|
+
def _is_blank_line(self, line: str) -> bool:
|
|
66
|
+
return not line.strip()
|
|
67
|
+
|
|
68
|
+
def _round_to_nearest_indentation_level(self, line: str) -> int:
|
|
69
|
+
leading_spaces = self._count_leading_spaces(line)
|
|
70
|
+
return math.ceil(leading_spaces / self._spaces_per_nesting_level)
|
|
71
|
+
|
|
72
|
+
def _count_leading_spaces(self, line: str) -> int:
|
|
73
|
+
return len(line) - len(line.lstrip())
|
|
74
|
+
|
|
75
|
+
def _extract_content(self, line: str) -> str:
|
|
76
|
+
return line.lstrip()
|
|
77
|
+
|
|
78
|
+
def _build_indented_line(self, level: int, content: str) -> str:
|
|
79
|
+
standard_indent = self._create_standard_indent(level)
|
|
80
|
+
return standard_indent + content
|
|
81
|
+
|
|
82
|
+
def _create_standard_indent(self, level: int) -> str:
|
|
83
|
+
spaces = level * self._spaces_per_nesting_level
|
|
84
|
+
return " " * spaces
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import override
|
|
2
|
+
|
|
3
|
+
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class WhitespacePreProcessor(PreProcessor):
|
|
7
|
+
@override
|
|
8
|
+
def process(self, markdown_text: str) -> str:
|
|
9
|
+
if not markdown_text:
|
|
10
|
+
return ""
|
|
11
|
+
|
|
12
|
+
lines = markdown_text.split("\n")
|
|
13
|
+
processed_lines = []
|
|
14
|
+
code_block_lines = []
|
|
15
|
+
non_code_lines = []
|
|
16
|
+
in_code_block = False
|
|
17
|
+
|
|
18
|
+
for line in lines:
|
|
19
|
+
if self._is_code_fence(line):
|
|
20
|
+
if in_code_block:
|
|
21
|
+
# Format and add code block
|
|
22
|
+
processed_lines.extend(self._format_code_block(code_block_lines))
|
|
23
|
+
processed_lines.append("```")
|
|
24
|
+
code_block_lines = []
|
|
25
|
+
in_code_block = False
|
|
26
|
+
else:
|
|
27
|
+
# Format accumulated non-code lines before starting code block
|
|
28
|
+
if non_code_lines:
|
|
29
|
+
processed_lines.extend(self._format_code_block(non_code_lines))
|
|
30
|
+
non_code_lines = []
|
|
31
|
+
|
|
32
|
+
language = self._extract_language(line)
|
|
33
|
+
processed_lines.append(f"```{language}")
|
|
34
|
+
in_code_block = True
|
|
35
|
+
elif in_code_block:
|
|
36
|
+
code_block_lines.append(line)
|
|
37
|
+
else:
|
|
38
|
+
non_code_lines.append(line)
|
|
39
|
+
|
|
40
|
+
# Format remaining non-code lines at the end
|
|
41
|
+
if non_code_lines:
|
|
42
|
+
processed_lines.extend(self._format_code_block(non_code_lines))
|
|
43
|
+
|
|
44
|
+
return "\n".join(processed_lines)
|
|
45
|
+
|
|
46
|
+
def _is_code_fence(self, line: str) -> bool:
|
|
47
|
+
return line.lstrip().startswith("```")
|
|
48
|
+
|
|
49
|
+
def _extract_language(self, fence_line: str) -> str:
|
|
50
|
+
return fence_line.lstrip().removeprefix("```").strip()
|
|
51
|
+
|
|
52
|
+
def _format_code_block(self, lines: list[str]) -> list[str]:
|
|
53
|
+
if not lines:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
non_empty_lines = [line for line in lines if line.strip()]
|
|
57
|
+
if not non_empty_lines:
|
|
58
|
+
return ["" for _ in lines]
|
|
59
|
+
|
|
60
|
+
min_indent = min(self._count_leading_spaces(line) for line in non_empty_lines)
|
|
61
|
+
|
|
62
|
+
if min_indent == 0:
|
|
63
|
+
return lines
|
|
64
|
+
|
|
65
|
+
return [self._remove_indent(line, min_indent) for line in lines]
|
|
66
|
+
|
|
67
|
+
def _count_leading_spaces(self, line: str) -> int:
|
|
68
|
+
return len(line) - len(line.lstrip())
|
|
69
|
+
|
|
70
|
+
def _remove_indent(self, line: str, indent_size: int) -> str:
|
|
71
|
+
if not line.strip():
|
|
72
|
+
return ""
|
|
73
|
+
return line[indent_size:]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MarkdownPreProcessor:
|
|
5
|
+
def __init__(self) -> None:
|
|
6
|
+
self._processors: list[PreProcessor] = []
|
|
7
|
+
|
|
8
|
+
def register(self, processor: PreProcessor) -> None:
|
|
9
|
+
self._processors.append(processor)
|
|
10
|
+
|
|
11
|
+
def process(self, markdown_text: str) -> str:
|
|
12
|
+
result = markdown_text
|
|
13
|
+
for processor in self._processors:
|
|
14
|
+
result = processor.process(result)
|
|
15
|
+
return result
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from notionary.blocks.schemas import BlockCreatePayload
|
|
2
|
+
from notionary.page.content.parser.parsers import (
|
|
3
|
+
BlockParsingContext,
|
|
4
|
+
ParentBlockContext,
|
|
5
|
+
)
|
|
6
|
+
from notionary.page.content.parser.parsers.base import LineParser
|
|
7
|
+
from notionary.page.content.parser.post_processing.service import BlockPostProcessor
|
|
8
|
+
from notionary.page.content.parser.pre_processsing.service import MarkdownPreProcessor
|
|
9
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MarkdownToNotionConverter(LoggingMixin):
|
|
13
|
+
def __init__(
|
|
14
|
+
self, line_parser: LineParser, pre_processor: MarkdownPreProcessor, post_processor: BlockPostProcessor
|
|
15
|
+
) -> None:
|
|
16
|
+
self._line_parser = line_parser
|
|
17
|
+
self._pre_processor = pre_processor
|
|
18
|
+
self._post_processor = post_processor
|
|
19
|
+
|
|
20
|
+
async def convert(self, markdown_text: str) -> list[BlockCreatePayload]:
|
|
21
|
+
if not markdown_text:
|
|
22
|
+
return []
|
|
23
|
+
|
|
24
|
+
markdown_text = self._pre_processor.process(markdown_text)
|
|
25
|
+
all_blocks = await self._process_lines(markdown_text)
|
|
26
|
+
all_blocks = self._post_processor.process(all_blocks)
|
|
27
|
+
|
|
28
|
+
return all_blocks
|
|
29
|
+
|
|
30
|
+
async def _process_lines(self, text: str) -> list[BlockCreatePayload]:
|
|
31
|
+
lines = text.split("\n")
|
|
32
|
+
result_blocks: list[BlockCreatePayload] = []
|
|
33
|
+
parent_stack: list[ParentBlockContext] = []
|
|
34
|
+
|
|
35
|
+
current_line_index = 0
|
|
36
|
+
previous_line_was_empty = False
|
|
37
|
+
|
|
38
|
+
while current_line_index < len(lines):
|
|
39
|
+
line = lines[current_line_index]
|
|
40
|
+
|
|
41
|
+
context = self._create_line_processing_context(
|
|
42
|
+
line=line,
|
|
43
|
+
lines=lines,
|
|
44
|
+
line_index=current_line_index,
|
|
45
|
+
result_blocks=result_blocks,
|
|
46
|
+
parent_stack=parent_stack,
|
|
47
|
+
is_previous_line_empty=previous_line_was_empty,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
await self._line_parser.handle(context)
|
|
51
|
+
previous_line_was_empty = self._is_processed_line_empty(line)
|
|
52
|
+
|
|
53
|
+
current_line_index += 1 + context.lines_consumed
|
|
54
|
+
|
|
55
|
+
return result_blocks
|
|
56
|
+
|
|
57
|
+
def _create_line_processing_context(
|
|
58
|
+
self,
|
|
59
|
+
line: str,
|
|
60
|
+
lines: list[str],
|
|
61
|
+
line_index: int,
|
|
62
|
+
result_blocks: list[BlockCreatePayload],
|
|
63
|
+
parent_stack: list[ParentBlockContext],
|
|
64
|
+
is_previous_line_empty: bool = False,
|
|
65
|
+
) -> BlockParsingContext:
|
|
66
|
+
return BlockParsingContext(
|
|
67
|
+
line=line,
|
|
68
|
+
result_blocks=result_blocks,
|
|
69
|
+
parent_stack=parent_stack,
|
|
70
|
+
parse_children_callback=self._process_lines,
|
|
71
|
+
all_lines=lines,
|
|
72
|
+
current_line_index=line_index,
|
|
73
|
+
lines_consumed=0,
|
|
74
|
+
is_previous_line_empty=is_previous_line_empty,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def _is_processed_line_empty(self, line: str) -> bool:
|
|
78
|
+
return line.strip() == ""
|