notionary 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +14 -2
- notionary/blocks/enums.py +27 -6
- notionary/blocks/schemas.py +32 -78
- notionary/comments/client.py +6 -9
- notionary/comments/schemas.py +2 -29
- notionary/data_source/http/data_source_instance_client.py +4 -4
- notionary/data_source/properties/schemas.py +128 -107
- notionary/data_source/query/__init__.py +9 -0
- notionary/data_source/query/builder.py +12 -3
- notionary/data_source/query/schema.py +5 -0
- notionary/data_source/schemas.py +2 -2
- notionary/data_source/service.py +43 -132
- notionary/database/schemas.py +2 -2
- notionary/database/service.py +19 -63
- notionary/exceptions/__init__.py +10 -2
- notionary/exceptions/api.py +2 -2
- notionary/exceptions/base.py +1 -1
- notionary/exceptions/block_parsing.py +24 -3
- notionary/exceptions/data_source/builder.py +2 -2
- notionary/exceptions/data_source/properties.py +3 -3
- notionary/exceptions/file_upload.py +67 -0
- notionary/exceptions/properties.py +4 -4
- notionary/exceptions/search.py +4 -4
- notionary/file_upload/__init__.py +4 -0
- notionary/file_upload/client.py +124 -210
- notionary/file_upload/config/__init__.py +17 -0
- notionary/file_upload/config/config.py +32 -0
- notionary/file_upload/config/constants.py +16 -0
- notionary/file_upload/file/reader.py +28 -0
- notionary/file_upload/query/__init__.py +7 -0
- notionary/file_upload/query/builder.py +54 -0
- notionary/file_upload/query/models.py +37 -0
- notionary/file_upload/schemas.py +78 -0
- notionary/file_upload/service.py +152 -289
- notionary/file_upload/validation/factory.py +64 -0
- notionary/file_upload/validation/impl/file_name_length.py +23 -0
- notionary/file_upload/validation/models.py +124 -0
- notionary/file_upload/validation/port.py +7 -0
- notionary/file_upload/validation/service.py +17 -0
- notionary/file_upload/validation/validators/__init__.py +11 -0
- notionary/file_upload/validation/validators/file_exists.py +15 -0
- notionary/file_upload/validation/validators/file_extension.py +122 -0
- notionary/file_upload/validation/validators/file_name_length.py +21 -0
- notionary/file_upload/validation/validators/upload_limit.py +31 -0
- notionary/http/client.py +7 -23
- notionary/page/content/factory.py +2 -0
- notionary/page/content/parser/factory.py +8 -5
- notionary/page/content/parser/parsers/audio.py +8 -33
- notionary/page/content/parser/parsers/embed.py +0 -2
- notionary/page/content/parser/parsers/file.py +8 -35
- notionary/page/content/parser/parsers/file_like_block.py +89 -0
- notionary/page/content/parser/parsers/image.py +8 -35
- notionary/page/content/parser/parsers/pdf.py +8 -35
- notionary/page/content/parser/parsers/video.py +8 -35
- notionary/page/content/parser/pre_processsing/handlers/__init__.py +2 -0
- notionary/page/content/parser/pre_processsing/handlers/column_syntax.py +12 -8
- notionary/page/content/parser/pre_processsing/handlers/indentation.py +2 -0
- notionary/page/content/parser/pre_processsing/handlers/video_syntax.py +66 -0
- notionary/page/content/parser/pre_processsing/handlers/whitespace.py +2 -0
- notionary/page/content/renderer/renderers/audio.py +9 -21
- notionary/page/content/renderer/renderers/file.py +9 -21
- notionary/page/content/renderer/renderers/file_like_block.py +43 -0
- notionary/page/content/renderer/renderers/image.py +9 -21
- notionary/page/content/renderer/renderers/pdf.py +9 -21
- notionary/page/content/renderer/renderers/video.py +9 -21
- notionary/page/content/syntax/__init__.py +2 -1
- notionary/page/content/syntax/registry.py +38 -60
- notionary/page/properties/client.py +3 -3
- notionary/page/properties/{models.py → schemas.py} +93 -107
- notionary/page/properties/service.py +15 -4
- notionary/page/schemas.py +3 -3
- notionary/page/service.py +18 -79
- notionary/shared/entity/dto_parsers.py +1 -36
- notionary/shared/entity/entity_metadata_update_client.py +18 -4
- notionary/shared/entity/schemas.py +6 -6
- notionary/shared/entity/service.py +121 -40
- notionary/shared/models/file.py +34 -6
- notionary/shared/models/icon.py +5 -12
- notionary/user/bot.py +12 -12
- notionary/utils/decorators.py +8 -8
- notionary/utils/pagination.py +36 -32
- notionary/workspace/__init__.py +2 -2
- notionary/workspace/client.py +2 -0
- notionary/workspace/query/__init__.py +3 -2
- notionary/workspace/query/builder.py +25 -1
- notionary/workspace/query/models.py +9 -1
- notionary/workspace/query/service.py +15 -11
- notionary/workspace/service.py +46 -36
- {notionary-0.3.0.dist-info → notionary-0.4.0.dist-info}/METADATA +9 -5
- {notionary-0.3.0.dist-info → notionary-0.4.0.dist-info}/RECORD +92 -71
- notionary/file_upload/models.py +0 -69
- notionary/page/page_context.py +0 -50
- notionary/shared/models/cover.py +0 -20
- {notionary-0.3.0.dist-info → notionary-0.4.0.dist-info}/WHEEL +0 -0
- {notionary-0.3.0.dist-info → notionary-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Generic, TypeVar, override
|
|
4
|
+
|
|
5
|
+
from notionary.blocks.schemas import (
|
|
6
|
+
ExternalFileWithCaption,
|
|
7
|
+
FileUploadFileWithCaption,
|
|
8
|
+
FileWithCaption,
|
|
9
|
+
)
|
|
10
|
+
from notionary.exceptions.file_upload import UploadFailedError, UploadTimeoutError
|
|
11
|
+
from notionary.file_upload.service import NotionFileUpload
|
|
12
|
+
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
13
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
14
|
+
from notionary.page.content.syntax.models import SyntaxDefinition
|
|
15
|
+
from notionary.shared.models.file import ExternalFileData, FileUploadedFileData
|
|
16
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
17
|
+
|
|
18
|
+
_TBlock = TypeVar("_TBlock")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FileLikeBlockParser(LineParser, LoggingMixin, Generic[_TBlock]):
|
|
22
|
+
def __init__(self, syntax_registry: SyntaxRegistry, file_upload_service: NotionFileUpload | None = None) -> None:
|
|
23
|
+
super().__init__(syntax_registry)
|
|
24
|
+
self._syntax = self._get_syntax(syntax_registry)
|
|
25
|
+
self._file_upload_service = file_upload_service or NotionFileUpload()
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def _get_syntax(self, syntax_registry: SyntaxRegistry) -> SyntaxDefinition:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def _create_block(self, file_data: FileWithCaption) -> _TBlock:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@override
|
|
36
|
+
def _can_handle(self, context: BlockParsingContext) -> bool:
|
|
37
|
+
if context.is_inside_parent_context():
|
|
38
|
+
return False
|
|
39
|
+
return self._syntax.regex_pattern.search(context.line) is not None
|
|
40
|
+
|
|
41
|
+
@override
|
|
42
|
+
async def _process(self, context: BlockParsingContext) -> None:
|
|
43
|
+
path_or_url = self._extract_path_or_url(context.line)
|
|
44
|
+
if not path_or_url:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
if self._is_external_url(path_or_url):
|
|
49
|
+
file_data = ExternalFileWithCaption(external=ExternalFileData(url=path_or_url))
|
|
50
|
+
else:
|
|
51
|
+
file_data = await self._upload_local_file(path_or_url)
|
|
52
|
+
|
|
53
|
+
block = self._create_block(file_data)
|
|
54
|
+
context.result_blocks.append(block)
|
|
55
|
+
|
|
56
|
+
except FileNotFoundError:
|
|
57
|
+
self.logger.warning("File not found: '%s' - skipping block", path_or_url)
|
|
58
|
+
except PermissionError:
|
|
59
|
+
self.logger.warning("No permission to read file: '%s' - skipping block", path_or_url)
|
|
60
|
+
except IsADirectoryError:
|
|
61
|
+
self.logger.warning("Path is a directory, not a file: '%s' - skipping block", path_or_url)
|
|
62
|
+
except (UploadFailedError, UploadTimeoutError) as e:
|
|
63
|
+
self.logger.warning("Upload failed for '%s': %s - skipping block", path_or_url, e)
|
|
64
|
+
except OSError as e:
|
|
65
|
+
self.logger.warning("IO error reading file '%s': %s - skipping block", path_or_url, e)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
self.logger.warning("Unexpected error processing file '%s': %s - skipping block", path_or_url, e)
|
|
68
|
+
|
|
69
|
+
def _extract_path_or_url(self, line: str) -> str | None:
|
|
70
|
+
match = self._syntax.regex_pattern.search(line)
|
|
71
|
+
return match.group(1).strip() if match else None
|
|
72
|
+
|
|
73
|
+
def _is_external_url(self, path_or_url: str) -> bool:
|
|
74
|
+
if path_or_url.startswith("http://") or path_or_url.startswith("https://"):
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
if path_or_url.startswith("data:"):
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
return path_or_url.startswith("/")
|
|
81
|
+
|
|
82
|
+
async def _upload_local_file(self, file_path: str) -> FileUploadFileWithCaption:
|
|
83
|
+
path = Path(file_path)
|
|
84
|
+
self.logger.debug("Uploading local file: '%s'", path)
|
|
85
|
+
upload_response = await self._file_upload_service.upload_file(path)
|
|
86
|
+
|
|
87
|
+
return FileUploadFileWithCaption(
|
|
88
|
+
file_upload=FileUploadedFileData(id=upload_response.id),
|
|
89
|
+
)
|
|
@@ -1,42 +1,15 @@
|
|
|
1
|
-
"""Parser for image blocks."""
|
|
2
|
-
|
|
3
1
|
from typing import override
|
|
4
2
|
|
|
5
|
-
from notionary.blocks.schemas import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
FileData,
|
|
9
|
-
FileType,
|
|
10
|
-
)
|
|
11
|
-
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
12
|
-
from notionary.page.content.syntax import SyntaxRegistry
|
|
13
|
-
|
|
3
|
+
from notionary.blocks.schemas import CreateImageBlock, ExternalFileWithCaption
|
|
4
|
+
from notionary.page.content.parser.parsers.file_like_block import FileLikeBlockParser
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition, SyntaxRegistry
|
|
14
6
|
|
|
15
|
-
class ImageParser(LineParser):
|
|
16
|
-
def __init__(self, syntax_registry: SyntaxRegistry) -> None:
|
|
17
|
-
super().__init__(syntax_registry)
|
|
18
|
-
self._syntax = syntax_registry.get_image_syntax()
|
|
19
7
|
|
|
8
|
+
class ImageParser(FileLikeBlockParser[CreateImageBlock]):
|
|
20
9
|
@override
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
return False
|
|
24
|
-
return self._syntax.regex_pattern.search(context.line) is not None
|
|
10
|
+
def _get_syntax(self, syntax_registry: SyntaxRegistry) -> SyntaxDefinition:
|
|
11
|
+
return syntax_registry.get_image_syntax()
|
|
25
12
|
|
|
26
13
|
@override
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if not url:
|
|
30
|
-
return
|
|
31
|
-
|
|
32
|
-
image_data = FileData(
|
|
33
|
-
type=FileType.EXTERNAL,
|
|
34
|
-
external=ExternalFile(url=url),
|
|
35
|
-
caption=[],
|
|
36
|
-
)
|
|
37
|
-
block = CreateImageBlock(image=image_data)
|
|
38
|
-
context.result_blocks.append(block)
|
|
39
|
-
|
|
40
|
-
def _extract_url(self, line: str) -> str | None:
|
|
41
|
-
match = self._syntax.regex_pattern.search(line)
|
|
42
|
-
return match.group(1).strip() if match else None
|
|
14
|
+
def _create_block(self, file_data: ExternalFileWithCaption) -> CreateImageBlock:
|
|
15
|
+
return CreateImageBlock(image=file_data)
|
|
@@ -1,42 +1,15 @@
|
|
|
1
|
-
"""Parser for PDF blocks."""
|
|
2
|
-
|
|
3
1
|
from typing import override
|
|
4
2
|
|
|
5
|
-
from notionary.blocks.schemas import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
FileData,
|
|
9
|
-
FileType,
|
|
10
|
-
)
|
|
11
|
-
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
12
|
-
from notionary.page.content.syntax import SyntaxRegistry
|
|
13
|
-
|
|
3
|
+
from notionary.blocks.schemas import CreatePdfBlock, ExternalFileWithCaption
|
|
4
|
+
from notionary.page.content.parser.parsers.file_like_block import FileLikeBlockParser
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition, SyntaxRegistry
|
|
14
6
|
|
|
15
|
-
class PdfParser(LineParser):
|
|
16
|
-
def __init__(self, syntax_registry: SyntaxRegistry) -> None:
|
|
17
|
-
super().__init__(syntax_registry)
|
|
18
|
-
self._syntax = syntax_registry.get_pdf_syntax()
|
|
19
7
|
|
|
8
|
+
class PdfParser(FileLikeBlockParser[CreatePdfBlock]):
|
|
20
9
|
@override
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
return False
|
|
24
|
-
return self._syntax.regex_pattern.search(context.line) is not None
|
|
10
|
+
def _get_syntax(self, syntax_registry: SyntaxRegistry) -> SyntaxDefinition:
|
|
11
|
+
return syntax_registry.get_pdf_syntax()
|
|
25
12
|
|
|
26
13
|
@override
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if not url:
|
|
30
|
-
return
|
|
31
|
-
|
|
32
|
-
pdf_data = FileData(
|
|
33
|
-
type=FileType.EXTERNAL,
|
|
34
|
-
external=ExternalFile(url=url),
|
|
35
|
-
caption=[],
|
|
36
|
-
)
|
|
37
|
-
block = CreatePdfBlock(pdf=pdf_data)
|
|
38
|
-
context.result_blocks.append(block)
|
|
39
|
-
|
|
40
|
-
def _extract_url(self, line: str) -> str | None:
|
|
41
|
-
match = self._syntax.regex_pattern.search(line)
|
|
42
|
-
return match.group(1).strip() if match else None
|
|
14
|
+
def _create_block(self, file_data: ExternalFileWithCaption) -> CreatePdfBlock:
|
|
15
|
+
return CreatePdfBlock(pdf=file_data)
|
|
@@ -1,42 +1,15 @@
|
|
|
1
|
-
"""Parser for video blocks."""
|
|
2
|
-
|
|
3
1
|
from typing import override
|
|
4
2
|
|
|
5
|
-
from notionary.blocks.schemas import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
FileData,
|
|
9
|
-
FileType,
|
|
10
|
-
)
|
|
11
|
-
from notionary.page.content.parser.parsers.base import BlockParsingContext, LineParser
|
|
12
|
-
from notionary.page.content.syntax import SyntaxRegistry
|
|
13
|
-
|
|
3
|
+
from notionary.blocks.schemas import CreateVideoBlock, ExternalFileWithCaption
|
|
4
|
+
from notionary.page.content.parser.parsers.file_like_block import FileLikeBlockParser
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition, SyntaxRegistry
|
|
14
6
|
|
|
15
|
-
class VideoParser(LineParser):
|
|
16
|
-
def __init__(self, syntax_registry: SyntaxRegistry) -> None:
|
|
17
|
-
super().__init__(syntax_registry)
|
|
18
|
-
self._syntax = syntax_registry.get_video_syntax()
|
|
19
7
|
|
|
8
|
+
class VideoParser(FileLikeBlockParser[CreateVideoBlock]):
|
|
20
9
|
@override
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
return False
|
|
24
|
-
return self._syntax.regex_pattern.search(context.line) is not None
|
|
10
|
+
def _get_syntax(self, syntax_registry: SyntaxRegistry) -> SyntaxDefinition:
|
|
11
|
+
return syntax_registry.get_video_syntax()
|
|
25
12
|
|
|
26
13
|
@override
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if not url:
|
|
30
|
-
return
|
|
31
|
-
|
|
32
|
-
video_data = FileData(
|
|
33
|
-
type=FileType.EXTERNAL,
|
|
34
|
-
external=ExternalFile(url=url),
|
|
35
|
-
caption=[],
|
|
36
|
-
)
|
|
37
|
-
block = CreateVideoBlock(video=video_data)
|
|
38
|
-
context.result_blocks.append(block)
|
|
39
|
-
|
|
40
|
-
def _extract_url(self, line: str) -> str | None:
|
|
41
|
-
match = self._syntax.regex_pattern.search(line)
|
|
42
|
-
return match.group(1).strip() if match else None
|
|
14
|
+
def _create_block(self, file_data: ExternalFileWithCaption) -> CreateVideoBlock:
|
|
15
|
+
return CreateVideoBlock(video=file_data)
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
from .column_syntax import ColumnSyntaxPreProcessor
|
|
2
2
|
from .indentation import IndentationNormalizer
|
|
3
3
|
from .port import PreProcessor
|
|
4
|
+
from .video_syntax import VideoFormatPreProcessor
|
|
4
5
|
from .whitespace import WhitespacePreProcessor
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"ColumnSyntaxPreProcessor",
|
|
8
9
|
"IndentationNormalizer",
|
|
9
10
|
"PreProcessor",
|
|
11
|
+
"VideoFormatPreProcessor",
|
|
10
12
|
"WhitespacePreProcessor",
|
|
11
13
|
]
|
|
@@ -4,13 +4,14 @@ from typing import override
|
|
|
4
4
|
from notionary.exceptions.block_parsing import InsufficientColumnsError, InvalidColumnRatioSumError
|
|
5
5
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
6
6
|
from notionary.page.content.syntax import MarkdownGrammar, SyntaxRegistry
|
|
7
|
+
from notionary.utils.decorators import time_execution_sync
|
|
7
8
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
8
9
|
|
|
9
|
-
RATIO_TOLERANCE = 0.0001
|
|
10
|
-
MINIMUM_COLUMNS = 2
|
|
11
|
-
|
|
12
10
|
|
|
13
11
|
class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
12
|
+
_RATIO_TOLERANCE = 0.0001
|
|
13
|
+
_MINIMUM_COLUMNS = 2
|
|
14
|
+
|
|
14
15
|
def __init__(
|
|
15
16
|
self, syntax_registry: SyntaxRegistry | None = None, markdown_grammar: MarkdownGrammar | None = None
|
|
16
17
|
) -> None:
|
|
@@ -24,6 +25,7 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
24
25
|
self._column_pattern = self._syntax_registry.get_column_syntax().regex_pattern
|
|
25
26
|
|
|
26
27
|
@override
|
|
28
|
+
@time_execution_sync()
|
|
27
29
|
def process(self, markdown_text: str) -> str:
|
|
28
30
|
if not self._contains_column_lists(markdown_text):
|
|
29
31
|
return markdown_text
|
|
@@ -96,8 +98,10 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
96
98
|
return list(self._column_pattern.finditer(content))
|
|
97
99
|
|
|
98
100
|
def _validate_minimum_column_count(self, column_count: int) -> None:
|
|
99
|
-
if column_count <
|
|
100
|
-
self.logger.error(
|
|
101
|
+
if column_count < self._MINIMUM_COLUMNS:
|
|
102
|
+
self.logger.error(
|
|
103
|
+
f"Column list must contain at least {self._MINIMUM_COLUMNS} columns, found {column_count}"
|
|
104
|
+
)
|
|
101
105
|
raise InsufficientColumnsError(column_count)
|
|
102
106
|
|
|
103
107
|
def _extract_column_ratios(self, column_matches: list[re.Match]) -> list[float]:
|
|
@@ -120,11 +124,11 @@ class ColumnSyntaxPreProcessor(PreProcessor, LoggingMixin):
|
|
|
120
124
|
total_ratio = sum(ratios)
|
|
121
125
|
|
|
122
126
|
if not self._is_ratio_sum_valid(total_ratio):
|
|
123
|
-
self.logger.error(f"Column ratios must sum to 1.0 (±{
|
|
124
|
-
raise InvalidColumnRatioSumError(total_ratio,
|
|
127
|
+
self.logger.error(f"Column ratios must sum to 1.0 (±{self._RATIO_TOLERANCE}), but sum to {total_ratio:.4f}")
|
|
128
|
+
raise InvalidColumnRatioSumError(total_ratio, self._RATIO_TOLERANCE)
|
|
125
129
|
|
|
126
130
|
def _should_validate_ratios(self, ratios: list[float], column_count: int) -> bool:
|
|
127
131
|
return len(ratios) > 0 and len(ratios) == column_count
|
|
128
132
|
|
|
129
133
|
def _is_ratio_sum_valid(self, total: float) -> bool:
|
|
130
|
-
return abs(total - 1.0) <=
|
|
134
|
+
return abs(total - 1.0) <= self._RATIO_TOLERANCE
|
|
@@ -3,6 +3,7 @@ from typing import override
|
|
|
3
3
|
|
|
4
4
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
5
5
|
from notionary.page.content.syntax import MarkdownGrammar, SyntaxRegistry
|
|
6
|
+
from notionary.utils.decorators import time_execution_sync
|
|
6
7
|
from notionary.utils.mixins.logging import LoggingMixin
|
|
7
8
|
|
|
8
9
|
|
|
@@ -18,6 +19,7 @@ class IndentationNormalizer(PreProcessor, LoggingMixin):
|
|
|
18
19
|
self._code_block_start_delimiter = self._syntax_registry.get_code_syntax().start_delimiter
|
|
19
20
|
|
|
20
21
|
@override
|
|
22
|
+
@time_execution_sync()
|
|
21
23
|
def process(self, markdown_text: str) -> str:
|
|
22
24
|
if self._is_empty(markdown_text):
|
|
23
25
|
return ""
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import override
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from notionary.blocks.enums import VideoFileType
|
|
6
|
+
from notionary.exceptions import UnsupportedVideoFormatError
|
|
7
|
+
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
8
|
+
from notionary.page.content.syntax import SyntaxRegistry
|
|
9
|
+
from notionary.utils.decorators import time_execution_sync
|
|
10
|
+
from notionary.utils.mixins.logging import LoggingMixin
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class VideoFormatPreProcessor(PreProcessor, LoggingMixin):
|
|
14
|
+
YOUTUBE_WATCH_PATTERN = re.compile(r"^https?://(?:www\.)?youtube\.com/watch\?.*v=[\w-]+", re.IGNORECASE)
|
|
15
|
+
YOUTUBE_EMBED_PATTERN = re.compile(r"^https?://(?:www\.)?youtube\.com/embed/[\w-]+", re.IGNORECASE)
|
|
16
|
+
|
|
17
|
+
def __init__(self, syntax_registry: SyntaxRegistry | None = None) -> None:
|
|
18
|
+
super().__init__()
|
|
19
|
+
self._syntax_registry = syntax_registry or SyntaxRegistry()
|
|
20
|
+
self._video_syntax = self._syntax_registry.get_video_syntax()
|
|
21
|
+
|
|
22
|
+
@override
|
|
23
|
+
@time_execution_sync()
|
|
24
|
+
def process(self, markdown_text: str) -> str:
|
|
25
|
+
lines = markdown_text.split("\n")
|
|
26
|
+
validated_lines = [self._validate_or_reject_line(line) for line in lines]
|
|
27
|
+
return "\n".join(validated_lines)
|
|
28
|
+
|
|
29
|
+
def _validate_or_reject_line(self, line: str) -> str:
|
|
30
|
+
if not self._contains_video_block(line):
|
|
31
|
+
return line
|
|
32
|
+
|
|
33
|
+
url = self._extract_url_from_video_block(line)
|
|
34
|
+
|
|
35
|
+
if self._is_supported_video_url(url):
|
|
36
|
+
return line
|
|
37
|
+
|
|
38
|
+
supported_formats = list(VideoFileType.get_all_extensions())
|
|
39
|
+
raise UnsupportedVideoFormatError(url, supported_formats)
|
|
40
|
+
|
|
41
|
+
def _contains_video_block(self, line: str) -> bool:
|
|
42
|
+
return self._video_syntax.regex_pattern.search(line) is not None
|
|
43
|
+
|
|
44
|
+
def _extract_url_from_video_block(self, line: str) -> str:
|
|
45
|
+
match = self._video_syntax.regex_pattern.search(line)
|
|
46
|
+
return match.group(1).strip() if match else ""
|
|
47
|
+
|
|
48
|
+
def _is_supported_video_url(self, url: str) -> bool:
|
|
49
|
+
return (
|
|
50
|
+
self._is_youtube_video(url)
|
|
51
|
+
or self._has_valid_video_extension(url)
|
|
52
|
+
or self._url_path_has_valid_extension(url)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def _is_youtube_video(self, url: str) -> bool:
|
|
56
|
+
return bool(self.YOUTUBE_WATCH_PATTERN.match(url) or self.YOUTUBE_EMBED_PATTERN.match(url))
|
|
57
|
+
|
|
58
|
+
def _has_valid_video_extension(self, url: str) -> bool:
|
|
59
|
+
return VideoFileType.is_valid_extension(url)
|
|
60
|
+
|
|
61
|
+
def _url_path_has_valid_extension(self, url: str) -> bool:
|
|
62
|
+
try:
|
|
63
|
+
parsed_url = urlparse(url)
|
|
64
|
+
return VideoFileType.is_valid_extension(parsed_url.path.lower())
|
|
65
|
+
except Exception:
|
|
66
|
+
return False
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
3
|
from notionary.page.content.parser.pre_processsing.handlers.port import PreProcessor
|
|
4
|
+
from notionary.utils.decorators import time_execution_sync
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class WhitespacePreProcessor(PreProcessor):
|
|
7
8
|
@override
|
|
9
|
+
@time_execution_sync()
|
|
8
10
|
def process(self, markdown_text: str) -> str:
|
|
9
11
|
if not markdown_text:
|
|
10
12
|
return ""
|
|
@@ -1,31 +1,19 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.
|
|
3
|
+
from notionary.blocks.schemas import Block, BlockType, ExternalFileWithCaption, NotionHostedFileWithCaption
|
|
4
|
+
from notionary.page.content.renderer.renderers.file_like_block import FileLikeBlockRenderer
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class AudioRenderer(
|
|
8
|
+
class AudioRenderer(FileLikeBlockRenderer):
|
|
8
9
|
@override
|
|
9
10
|
def _can_handle(self, block: Block) -> bool:
|
|
10
11
|
return block.type == BlockType.AUDIO
|
|
11
12
|
|
|
12
13
|
@override
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
def _get_syntax(self) -> SyntaxDefinition:
|
|
15
|
+
return self._syntax_registry.get_audio_syntax()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
syntax = self._syntax_registry.get_audio_syntax()
|
|
20
|
-
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
21
|
-
|
|
22
|
-
def _extract_audio_url(self, block: Block) -> str:
|
|
23
|
-
if not block.audio:
|
|
24
|
-
return ""
|
|
25
|
-
|
|
26
|
-
if block.audio.external:
|
|
27
|
-
return block.audio.external.url or ""
|
|
28
|
-
elif block.audio.file:
|
|
29
|
-
return block.audio.file.url or ""
|
|
30
|
-
|
|
31
|
-
return ""
|
|
17
|
+
@override
|
|
18
|
+
def _get_file_data(self, block: Block) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
|
+
return block.audio
|
|
@@ -1,34 +1,22 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.
|
|
3
|
+
from notionary.blocks.schemas import Block, BlockType, ExternalFileWithCaption, NotionHostedFileWithCaption
|
|
4
|
+
from notionary.page.content.renderer.renderers.file_like_block import FileLikeBlockRenderer
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class FileRenderer(
|
|
8
|
+
class FileRenderer(FileLikeBlockRenderer):
|
|
8
9
|
@override
|
|
9
10
|
def _can_handle(self, block: Block) -> bool:
|
|
10
11
|
return block.type == BlockType.FILE
|
|
11
12
|
|
|
12
13
|
@override
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
def _get_syntax(self) -> SyntaxDefinition:
|
|
15
|
+
return self._syntax_registry.get_file_syntax()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
syntax = self._syntax_registry.get_file_syntax()
|
|
20
|
-
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
21
|
-
|
|
22
|
-
def _extract_file_url(self, block: Block) -> str:
|
|
23
|
-
if not block.file:
|
|
24
|
-
return ""
|
|
25
|
-
|
|
26
|
-
if block.file.external:
|
|
27
|
-
return block.file.external.url or ""
|
|
28
|
-
elif block.file.file:
|
|
29
|
-
return block.file.file.url or ""
|
|
30
|
-
|
|
31
|
-
return ""
|
|
17
|
+
@override
|
|
18
|
+
def _get_file_data(self, block: Block) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
|
+
return block.file
|
|
32
20
|
|
|
33
21
|
def _extract_file_name(self, block: Block) -> str:
|
|
34
22
|
if not block.file:
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import override
|
|
3
|
+
|
|
4
|
+
from notionary.blocks.schemas import (
|
|
5
|
+
Block,
|
|
6
|
+
ExternalFileWithCaption,
|
|
7
|
+
NotionHostedFileWithCaption,
|
|
8
|
+
)
|
|
9
|
+
from notionary.page.content.renderer.renderers.captioned_block import CaptionedBlockRenderer
|
|
10
|
+
from notionary.page.content.syntax import SyntaxDefinition
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FileLikeBlockRenderer(CaptionedBlockRenderer):
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def _get_syntax(self) -> SyntaxDefinition:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def _get_file_data(self, block: Block) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@override
|
|
23
|
+
async def _render_main_content(self, block: Block) -> str:
|
|
24
|
+
url = self._extract_url(block)
|
|
25
|
+
|
|
26
|
+
if not url:
|
|
27
|
+
return ""
|
|
28
|
+
|
|
29
|
+
syntax = self._get_syntax()
|
|
30
|
+
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
31
|
+
|
|
32
|
+
def _extract_url(self, block: Block) -> str:
|
|
33
|
+
file_data = self._get_file_data(block)
|
|
34
|
+
|
|
35
|
+
if not file_data:
|
|
36
|
+
return ""
|
|
37
|
+
|
|
38
|
+
if isinstance(file_data, ExternalFileWithCaption):
|
|
39
|
+
return file_data.external.url or ""
|
|
40
|
+
elif isinstance(file_data, NotionHostedFileWithCaption):
|
|
41
|
+
return file_data.file.url or ""
|
|
42
|
+
|
|
43
|
+
return ""
|
|
@@ -1,31 +1,19 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.
|
|
3
|
+
from notionary.blocks.schemas import Block, BlockType, ExternalFileWithCaption, NotionHostedFileWithCaption
|
|
4
|
+
from notionary.page.content.renderer.renderers.file_like_block import FileLikeBlockRenderer
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class ImageRenderer(
|
|
8
|
+
class ImageRenderer(FileLikeBlockRenderer):
|
|
8
9
|
@override
|
|
9
10
|
def _can_handle(self, block: Block) -> bool:
|
|
10
11
|
return block.type == BlockType.IMAGE
|
|
11
12
|
|
|
12
13
|
@override
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
def _get_syntax(self) -> SyntaxDefinition:
|
|
15
|
+
return self._syntax_registry.get_image_syntax()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
syntax = self._syntax_registry.get_image_syntax()
|
|
20
|
-
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
21
|
-
|
|
22
|
-
def _extract_image_url(self, block: Block) -> str:
|
|
23
|
-
if not block.image:
|
|
24
|
-
return ""
|
|
25
|
-
|
|
26
|
-
if block.image.external:
|
|
27
|
-
return block.image.external.url or ""
|
|
28
|
-
elif block.image.file:
|
|
29
|
-
return block.image.file.url or ""
|
|
30
|
-
|
|
31
|
-
return ""
|
|
17
|
+
@override
|
|
18
|
+
def _get_file_data(self, block: Block) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
|
+
return block.image
|
|
@@ -1,31 +1,19 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.
|
|
3
|
+
from notionary.blocks.schemas import Block, BlockType, ExternalFileWithCaption, NotionHostedFileWithCaption
|
|
4
|
+
from notionary.page.content.renderer.renderers.file_like_block import FileLikeBlockRenderer
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class PdfRenderer(
|
|
8
|
+
class PdfRenderer(FileLikeBlockRenderer):
|
|
8
9
|
@override
|
|
9
10
|
def _can_handle(self, block: Block) -> bool:
|
|
10
11
|
return block.type == BlockType.PDF
|
|
11
12
|
|
|
12
13
|
@override
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
def _get_syntax(self) -> SyntaxDefinition:
|
|
15
|
+
return self._syntax_registry.get_pdf_syntax()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
syntax = self._syntax_registry.get_pdf_syntax()
|
|
20
|
-
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
21
|
-
|
|
22
|
-
def _extract_pdf_url(self, block: Block) -> str:
|
|
23
|
-
if not block.pdf:
|
|
24
|
-
return ""
|
|
25
|
-
|
|
26
|
-
if block.pdf.external:
|
|
27
|
-
return block.pdf.external.url or ""
|
|
28
|
-
elif block.pdf.file:
|
|
29
|
-
return block.pdf.file.url or ""
|
|
30
|
-
|
|
31
|
-
return ""
|
|
17
|
+
@override
|
|
18
|
+
def _get_file_data(self, block: Block) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
|
+
return block.pdf
|
|
@@ -1,31 +1,19 @@
|
|
|
1
1
|
from typing import override
|
|
2
2
|
|
|
3
|
-
from notionary.blocks.schemas import Block, BlockType
|
|
4
|
-
from notionary.page.content.renderer.renderers.
|
|
3
|
+
from notionary.blocks.schemas import Block, BlockType, ExternalFileWithCaption, NotionHostedFileWithCaption
|
|
4
|
+
from notionary.page.content.renderer.renderers.file_like_block import FileLikeBlockRenderer
|
|
5
|
+
from notionary.page.content.syntax import SyntaxDefinition
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class VideoRenderer(
|
|
8
|
+
class VideoRenderer(FileLikeBlockRenderer):
|
|
8
9
|
@override
|
|
9
10
|
def _can_handle(self, block: Block) -> bool:
|
|
10
11
|
return block.type == BlockType.VIDEO
|
|
11
12
|
|
|
12
13
|
@override
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
def _get_syntax(self) -> SyntaxDefinition:
|
|
15
|
+
return self._syntax_registry.get_video_syntax()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
syntax = self._syntax_registry.get_video_syntax()
|
|
20
|
-
return f"{syntax.start_delimiter}{url}{syntax.end_delimiter}"
|
|
21
|
-
|
|
22
|
-
def _extract_video_url(self, block: Block) -> str:
|
|
23
|
-
if not block.video:
|
|
24
|
-
return ""
|
|
25
|
-
|
|
26
|
-
if block.video.external:
|
|
27
|
-
return block.video.external.url or ""
|
|
28
|
-
elif block.video.file:
|
|
29
|
-
return block.video.file.url or ""
|
|
30
|
-
|
|
31
|
-
return ""
|
|
17
|
+
@override
|
|
18
|
+
def _get_file_data(self, block: Block) -> ExternalFileWithCaption | NotionHostedFileWithCaption | None:
|
|
19
|
+
return block.video
|